diff --git common/src/java/org/apache/hive/common/util/DateUtils.java common/src/java/org/apache/hive/common/util/DateUtils.java index 959a542..a1068ec 100644 --- common/src/java/org/apache/hive/common/util/DateUtils.java +++ common/src/java/org/apache/hive/common/util/DateUtils.java @@ -54,4 +54,24 @@ public static int parseNumericValueWithRange(String fieldName, } return result; } + + // From java.util.Calendar + private static final String[] FIELD_NAME = { + "ERA", "YEAR", "MONTH", "WEEK_OF_YEAR", "WEEK_OF_MONTH", "DAY_OF_MONTH", + "DAY_OF_YEAR", "DAY_OF_WEEK", "DAY_OF_WEEK_IN_MONTH", "AM_PM", "HOUR", + "HOUR_OF_DAY", "MINUTE", "SECOND", "MILLISECOND", "ZONE_OFFSET", + "DST_OFFSET" + }; + + /** + * Returns the name of the specified calendar field. + * + * @param field the calendar field + * @return the calendar field name + * @exception IndexOutOfBoundsException if field is negative, + * equal to or greater then FIELD_COUNT. + */ + public static String getFieldName(int field) { + return FIELD_NAME[field]; + } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt index e52fcc0..d8164a4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt @@ -157,7 +157,12 @@ public class extends VectorExpression { public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } - + + @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt index e1df589..31a015f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt @@ -167,6 +167,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnWithConvert.txt deleted file mode 100644 index bcd10a2..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnWithConvert.txt +++ /dev/null @@ -1,173 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Generated from template ColumnArithmeticColumnWithConvert.txt, which covers binary arithmetic - * expressions between columns. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - private int outputColumn; - - public (int colNum1, int colNum2, int outputColumn) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector1 = () batch.cols[colNum1]; - inputColVector2 = () batch.cols[colNum2]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - [] vector1 = inputColVector1.vector; - [] vector2 = inputColVector2.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first - NullUtil.propagateNullsColCol( - inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - - /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or - * more inputs are null. This is to improve speed by avoiding - * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputVector[0] = (vector1[0]) (vector2[0]); - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[0]) (vector2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[0]) (vector2[i]); - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[i]) (vector2[0]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[i]) (vector2[0]); - } - } - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[i]) (vector2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[i]) (vector2[i]); - } - } - } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and - * NaN for double. This is to prevent possible later zero-divide errors - * in complex arithmetic expressions like col2 / (col1 - 1) - * in the case when some col1 entries are null. - */ - NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} - diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index 87335f1..2cc1aa2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -134,6 +134,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt index 0bb1532..294bb4f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt @@ -132,6 +132,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt deleted file mode 100644 index 105eb92..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt +++ /dev/null @@ -1,150 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; - -/** - * Generated from template ColumnArithmeticScalarWithConvert.txt, which covers binary arithmetic - * expressions between a column and a scalar. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - - public (int colNum, value, int outputColumn) { - this.colNum = colNum; - this.value = (value); - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.isRepeating) { - outputVector[0] = (vector[0]) value; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector[i]) value; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector[i]) value; - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector[i]) value; - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector[i]) value; - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt index f2b4c81..cbc97da 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt @@ -157,6 +157,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt index 2438ee4..6568d1c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt @@ -149,6 +149,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index b0f6eb1..04b533a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -184,6 +184,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt index 623bcfb..68c4f58 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt index c6614ab..25e0d85 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt index 841ef93..0728f6c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt @@ -138,6 +138,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt index cf690db..efbf1ba 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt @@ -122,6 +122,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index b52b7c7..6574267 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -124,6 +124,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt index c3d8d7e..2a9f947 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt @@ -181,6 +181,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt index d1474fb..4bbc358 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt @@ -141,6 +141,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt index 63cebaf..2e66b3a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt index 7aee529..e679449 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt index c68ac34..e23dc27 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt @@ -155,6 +155,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt index cb6b750..85d88fd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt @@ -146,6 +146,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt index 619015e..0b7fefc 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt @@ -119,6 +119,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index e9aaaf2..0a9c444 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -173,7 +173,12 @@ public class extends VectorExpression { public void setRightValue( value) { this.leftValue = value; } - + + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + leftValue + ", right " + rightValue; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt index e25b9c2..ee80606 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt @@ -33,8 +33,8 @@ public class extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum1; - private int colNum2; + protected int colNum1; + protected int colNum2; public (int colNum1, int colNum2) { this.colNum1 = colNum1; @@ -182,6 +182,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt index b0f6e5c..248a66a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt @@ -32,8 +32,8 @@ public class extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum; - private value; + protected int colNum; + protected value; public (int colNum, value) { this.colNum = colNum; @@ -158,6 +158,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt index d68edfa..4cef036 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt @@ -155,6 +155,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt index a2352c6..ee450d3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt @@ -430,6 +430,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt index bdd39b9..9943f45 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt @@ -145,6 +145,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt index 0608016..4477aff 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt @@ -145,6 +145,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt index 57caf7e..610c062 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt index f5f59c2..73c46a1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt @@ -154,6 +154,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt index b7544c7..037382c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt @@ -32,8 +32,8 @@ public class extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum; - private value; + protected int colNum; + protected value; public ( value, int colNum) { this.colNum = colNum; @@ -158,6 +158,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt index e8049da..6cbfca1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -176,7 +178,12 @@ public class extends VectorExpression { public void setRight(byte[] value) { this.right = value; } - + + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + new String(left, StandardCharsets.UTF_8) + ", right " + new String(right, StandardCharsets.UTF_8); + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt index a72b882..9114932 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt @@ -477,6 +477,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt index 8b1c366..b56d451 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -152,4 +154,9 @@ public abstract class extends VectorExpression { this.value = value; } + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(value, StandardCharsets.UTF_8); + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt index 930069c..4fb5035 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -155,4 +157,10 @@ public abstract class extends VectorExpression { public void setValue(byte[] value) { this.value = value; } + + @Override + public String vectorExpressionParameters() { + return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + + colNum; + } + } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt index 4298d79..7863b16 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -154,6 +154,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt index d10be96..8873826 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt @@ -167,6 +167,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt index 31c3f6b..8583eee 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt @@ -150,6 +150,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt index 31dce1c..eeb73c9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt @@ -436,6 +436,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt index bab8508..23790a5 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt @@ -147,6 +147,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt index 5e418de..0e10779 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt @@ -43,6 +43,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt index ff5d11e..5a6def3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt @@ -149,6 +149,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt index 94a174d..781c9b8 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.common.type.; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -178,7 +180,13 @@ public class extends VectorExpression { public void setRight(byte[] value) { this.right = value; } - + + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", left " + new String(left, StandardCharsets.UTF_8) + + ", right " + new String(right, StandardCharsets.UTF_8); + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt index 3a75a26..9f4bb75 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -160,6 +160,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col " + arg2Column + ", val "+ arg3Scalar; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt index 648b776..487d894 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -162,6 +162,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt index def9863..5651d15 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -147,6 +147,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt index 8e3a419..49a1950 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt @@ -180,6 +180,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt index ad65d52..283352d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt @@ -141,6 +141,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt index 858c3d7..9eba829 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt index 66fffd2..9a06822 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt index ddde913..a5d9877 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt @@ -155,6 +155,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt index cbb7021..9a0d397 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt @@ -143,6 +143,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt index 9ccfaac..cff2deb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt @@ -139,6 +139,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt index c7d8c65..8308a30 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt @@ -129,6 +129,10 @@ public class extends VectorExpression { return "long"; } + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt index d47bc10..6aa30e4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt @@ -129,6 +129,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index 4fcbdc0..8473599 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -146,6 +146,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt index ea55bec..d3fd9bd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt @@ -129,6 +129,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt deleted file mode 100644 index 91887c8..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt +++ /dev/null @@ -1,163 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; - - -/* - * Because of the templatized nature of the code, either or both - * of these ColumnVector imports may be needed. Listing both of them - * rather than using ....vectorization.*; - */ -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; - -/** - * Generated from template ScalarArithmeticColumnWithConvert.txt. - * Implements a vectorized arithmetic operator with a scalar on the left and a - * column vector on the right. The result is output to an output column vector. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - - public ( value, int colNum, int outputColumn) { - this.colNum = colNum; - this.value = (value); - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - /** - * Method to evaluate scalar-column operation in vectorized fashion. - * - * @batch a package of rows with each column stored in a vector - */ - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.isRepeating) { - outputVector[0] = value (vector[0]); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value (vector[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value (vector[i]); - } - } - } else { /* there are nulls */ - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value (vector[i]); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value (vector[i]); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt index e6e59f5..6f9e2e2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt @@ -149,6 +149,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt index f8a8457..8e6e8a9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt @@ -162,6 +162,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt index c8a5d17..1014978 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt @@ -131,6 +131,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt index e881037..747f707 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt @@ -493,6 +493,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt index 92bf27a..d9530d6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -155,4 +157,8 @@ public abstract class extends VectorExpression { this.outputColumn = outputColumn; } + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(value, StandardCharsets.UTF_8); + } } \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt index 8a92f54..8e36fc0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt @@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.common.type.; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -44,6 +46,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(value, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt index 238dc93..5eed703 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -154,4 +156,9 @@ public abstract class extends VectorExpression { public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + + @Override + public String vectorExpressionParameters() { + return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + + colNum; + } } \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt index 27e083d..7aeff81 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt @@ -171,6 +171,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt index 8b91a4a..f8cb880 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt @@ -132,6 +132,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt index 4ac2174..989e2f5 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt @@ -170,6 +170,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt index 9382aca..a90b1b2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt @@ -128,6 +128,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt index 5eaa450..ad43cac 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt @@ -161,6 +161,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt index c6c872f..32b49a3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt @@ -130,6 +130,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt index 0fc402d..7267148 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt @@ -138,6 +138,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt index e0ae206..2be05f3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt @@ -129,6 +129,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt index f9fc425..2710fa4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt @@ -143,6 +143,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt index 90701ec..32647f2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt @@ -133,6 +133,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt index f958be8..dea4db2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt @@ -153,6 +153,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt index 585027a..e82b9e2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt @@ -152,6 +152,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt index 996c86a..0d8a26b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt @@ -143,6 +143,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt index 6506c93..ec0a395 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt @@ -135,6 +135,11 @@ public class extends VectorExpression { } @Override + public String vectorExpressionParameters() { + return "val " + value.toString() + ", col " + + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt index a9a3b6d..26da73a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt @@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.common.type.; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt index d153fd6..4393c3b 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt @@ -85,6 +85,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private DoubleWritable resultSum; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt index 46d66bd..7468c2f 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt @@ -77,6 +77,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt index 9a48171..6b91fc2 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt @@ -83,6 +83,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt index 3cdf7e2..749e97e 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt @@ -81,6 +81,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt index cdce457..9dfc147 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt @@ -93,6 +93,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Text result; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt index 7e34965..32ecb34 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -83,6 +83,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt index cc7e54d..bd0f14d 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt @@ -78,6 +78,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final result; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt index c6c9c52..dc9d4b1 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt @@ -84,6 +84,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt index 8fc94ba..01062a9 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt @@ -111,6 +111,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index b8a4693..74cec3e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -20,6 +20,8 @@ import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; +import org.apache.commons.lang3.tuple.ImmutablePair; + import java.io.OutputStream; import java.io.PrintStream; import java.io.Serializable; @@ -35,30 +37,68 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Stack; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.jsonexplain.JsonParser; import org.apache.hadoop.hive.common.jsonexplain.JsonParserFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.Validator.StringSet; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.exec.spark.SparkTask; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExplainWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationFactory; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.AnnotationUtils; @@ -157,6 +197,54 @@ public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws E return outJSONObject; } + private static String trueCondNameVectorizationEnabled = + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname + " IS true"; + private static String falseCondNameVectorizationEnabled = + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname + " IS false"; + + private ImmutablePair outputPlanVectorization(PrintStream out, boolean jsonOutput) + throws Exception { + + if (out != null) { + out.println("PLAN VECTORIZATION:"); + } + + JSONObject json = jsonOutput ? new JSONObject(new LinkedHashMap<>()) : null; + + HiveConf hiveConf = queryState.getConf(); + + boolean isVectorizationEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); + String isVectorizationEnabledCondName = + (isVectorizationEnabled ? + trueCondNameVectorizationEnabled : + falseCondNameVectorizationEnabled); + List isVectorizationEnabledCondList = Arrays.asList(isVectorizationEnabledCondName); + + if (out != null) { + out.print(indentString(2)); + out.print("enabled: "); + out.println(isVectorizationEnabled); + out.print(indentString(2)); + if (!isVectorizationEnabled) { + out.print("enabledConditionsNotMet: "); + } else { + out.print("enabledConditionsMet: "); + } + out.println(isVectorizationEnabledCondList); + } + if (jsonOutput) { + json.put("enabled", isVectorizationEnabled); + if (!isVectorizationEnabled) { + json.put("enabledConditionsNotMet", isVectorizationEnabledCondList); + } else { + json.put("enabledConditionsMet", isVectorizationEnabledCondList); + } + } + + return new ImmutablePair(isVectorizationEnabled, jsonOutput ? json : null); + } + public JSONObject getJSONPlan(PrintStream out, ExplainWork work) throws Exception { return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(), @@ -184,26 +272,46 @@ public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetc ordered.add(fetchTask); } - JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered); + boolean suppressOthersForVectorization = false; + if (this.work != null && this.work.isVectorization()) { + ImmutablePair planVecPair = outputPlanVectorization(out, jsonOutput); + + if (this.work.isVectorizationOnly()) { + // Suppress the STAGES if vectorization is off. + suppressOthersForVectorization = !planVecPair.left; + } - if (out != null) { - out.println(); + if (out != null) { + out.println(); + } + + if (jsonOutput) { + outJSONObject.put("PLAN VECTORIZATION", planVecPair.right); + } } - if (jsonOutput) { - outJSONObject.put("STAGE DEPENDENCIES", jsonDependencies); - } + if (!suppressOthersForVectorization) { + JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered); - // Go over all the tasks and dump out the plans - JSONObject jsonPlan = outputStagePlans(out, ordered, - jsonOutput, isExtended); + if (out != null) { + out.println(); + } - if (jsonOutput) { - outJSONObject.put("STAGE PLANS", jsonPlan); - } + if (jsonOutput) { + outJSONObject.put("STAGE DEPENDENCIES", jsonDependencies); + } - if (fetchTask != null) { - fetchTask.setParentTasks(null); + // Go over all the tasks and dump out the plans + JSONObject jsonPlan = outputStagePlans(out, ordered, + jsonOutput, isExtended); + + if (jsonOutput) { + outJSONObject.put("STAGE PLANS", jsonPlan); + } + + if (fetchTask != null) { + fetchTask.setParentTasks(null); + } } return jsonOutput ? outJSONObject : null; @@ -602,6 +710,64 @@ private JSONObject outputPlan(Object work, PrintStream out, } } if (invokeFlag) { + Vectorization vectorization = xpl_note.vectorization(); + if (this.work != null && this.work.isVectorization()) { + + // The EXPLAIN VECTORIZATION option was specified. + final boolean desireOnly = this.work.isVectorizationOnly(); + final VectorizationDetailLevel desiredVecDetailLevel = + this.work.isVectorizationDetailLevel(); + + switch (vectorization) { + case NON_VECTORIZED: + // Display all non-vectorized leaf objects unless ONLY. + if (desireOnly) { + invokeFlag = false; + } + break; + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + if (vectorization.rank < desiredVecDetailLevel.rank) { + // This detail not desired. + invokeFlag = false; + } + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + if (desireOnly) { + if (vectorization.rank < desiredVecDetailLevel.rank) { + // Suppress headers and all objects below. + invokeFlag = false; + } + } + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + } else { + // Do not display vectorization objects. + switch (vectorization) { + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + invokeFlag = false; + break; + case NON_VECTORIZED: + // No action. + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + // Always include headers since they contain non-vectorized objects, too. + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + } + } + if (invokeFlag) { keyJSONObject = xpl_note.displayName(); if (out != null) { out.print(indentString(indent)); @@ -675,6 +841,64 @@ private JSONObject outputPlan(Object work, PrintStream out, } } if (invokeFlag) { + Vectorization vectorization = xpl_note.vectorization(); + if (this.work != null && this.work.isVectorization()) { + + // The EXPLAIN VECTORIZATION option was specified. + final boolean desireOnly = this.work.isVectorizationOnly(); + final VectorizationDetailLevel desiredVecDetailLevel = + this.work.isVectorizationDetailLevel(); + + switch (vectorization) { + case NON_VECTORIZED: + // Display all non-vectorized leaf objects unless ONLY. + if (desireOnly) { + invokeFlag = false; + } + break; + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + if (vectorization.rank < desiredVecDetailLevel.rank) { + // This detail not desired. + invokeFlag = false; + } + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + if (desireOnly) { + if (vectorization.rank < desiredVecDetailLevel.rank) { + // Suppress headers and all objects below. + invokeFlag = false; + } + } + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + } else { + // Do not display vectorization objects. + switch (vectorization) { + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + invokeFlag = false; + break; + case NON_VECTORIZED: + // No action. + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + // Always include headers since they contain non-vectorized objects, too. + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + } + } + if (invokeFlag) { Object val = null; try { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 416606e..c070c4a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -93,7 +93,7 @@ protected transient ReusableGetAdaptor[] hashMapRowGetters; private UnwrapRowContainer[] unwrapContainer; - private transient Configuration hconf; + protected transient Configuration hconf; private transient boolean hybridMapJoinLeftover; // whether there's spilled data to be processed protected transient MapJoinBytesTableContainer[] spilledMapJoinTables; // used to hold restored // spilled small tables diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 038b96c..af1fa66 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -41,6 +41,8 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc; import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator; +import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; +import org.apache.hadoop.hive.ql.plan.AbstractVectorDesc; import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc; @@ -73,6 +75,7 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; import com.google.common.base.Preconditions; @@ -142,6 +145,8 @@ Class> opClass, CompilationOpContext cContext, T conf, VectorizationContext vContext) throws HiveException { try { + VectorDesc vectorDesc = ((AbstractOperatorDesc) conf).getVectorDesc(); + vectorDesc.setVectorOp(opClass); Operator op = (Operator) opClass.getDeclaredConstructor( CompilationOpContext.class, VectorizationContext.class, OperatorDesc.class) .newInstance(cContext, vContext, conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index 9049ddd..42c7d36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -201,5 +201,4 @@ public boolean isIdentitySelect() { return true; } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index 9f27f56..cbe83be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -93,9 +93,7 @@ public static boolean isSupportedField(ObjectInspector foi) { return true; } - public static boolean isSupportedField(String typeName) { - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - + public static boolean isSupportedField(TypeInfo typeInfo) { if (typeInfo.getCategory() != Category.PRIMITIVE) return false; // not supported PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; PrimitiveCategory pc = primitiveTypeInfo.getPrimitiveCategory(); @@ -103,6 +101,11 @@ public static boolean isSupportedField(String typeName) { return true; } + public static boolean isSupportedField(String typeName) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + return isSupportedField(typeInfo); + } + public static MapJoinKey readFromVector(Output output, MapJoinKey key, Object[] keyObject, List keyOIs, boolean mayReuseKey) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java index 1634f42..3cf6561 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java @@ -75,7 +75,7 @@ public void init(ExecMapperContext context, MapredContext mrContext, Configurati this.desc = joinOp.getConf(); if (desc.getVectorMode() && HiveConf.getBoolVar( hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); useFastContainer = vectorDesc != null && vectorDesc.hashTableImplementationType() == VectorMapJoinDesc.HashTableImplementationType.FAST; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java index c4b95c3..c890674 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java @@ -20,6 +20,8 @@ import java.util.Arrays; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + /** * This class collects column information for copying a row from one VectorizedRowBatch to * same/another batch. @@ -30,7 +32,7 @@ protected int[] sourceColumns; protected int[] outputColumns; - protected String[] typeNames; + protected TypeInfo[] typeInfos; protected VectorColumnOrderedMap vectorColumnMapping; @@ -38,7 +40,7 @@ public VectorColumnMapping(String name) { this.vectorColumnMapping = new VectorColumnOrderedMap(name); } - public abstract void add(int sourceColumn, int outputColumn, String typeName); + public abstract void add(int sourceColumn, int outputColumn, TypeInfo typeInfo); public abstract void finalize(); @@ -54,8 +56,8 @@ public int getCount() { return outputColumns; } - public String[] getTypeNames() { - return typeNames; + public TypeInfo[] getTypeInfos() { + return typeInfos; } @Override @@ -65,7 +67,7 @@ public String toString() { sb.append(", "); sb.append("output columns: " + Arrays.toString(outputColumns)); sb.append(", "); - sb.append("type names: " + Arrays.toString(typeNames)); + sb.append("type infos: " + Arrays.toString(typeInfos)); return sb.toString(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java index 0e6014b..97d55f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java @@ -23,8 +23,10 @@ import java.util.TreeMap; import org.apache.commons.lang.ArrayUtils; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for mapping vector columns, including the hive type name. @@ -43,17 +45,17 @@ private class Value { int valueColumn; - String typeName; + TypeInfo typeInfo; - Value(int valueColumn, String typeName) { + Value(int valueColumn, TypeInfo typeInfo) { this.valueColumn = valueColumn; - this.typeName = typeName; + this.typeInfo = typeInfo; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append("(value column: " + valueColumn); - sb.append(", type name: " + typeName + ")"); + sb.append(", type info: " + typeInfo.toString() + ")"); return sb.toString(); } } @@ -62,12 +64,12 @@ public String toString() { private final int[] orderedColumns; private final int[] valueColumns; - private final String[] typeNames; + private final TypeInfo[] typeInfos; - Mapping(int[] orderedColumns, int[] valueColumns, String[] typeNames) { + Mapping(int[] orderedColumns, int[] valueColumns, TypeInfo[] typeInfos) { this.orderedColumns = orderedColumns; this.valueColumns = valueColumns; - this.typeNames = typeNames; + this.typeInfos = typeInfos; } public int getCount() { @@ -82,8 +84,8 @@ public int getCount() { return valueColumns; } - public String[] getTypeNames() { - return typeNames; + public TypeInfo[] getTypeInfos() { + return typeInfos; } } @@ -92,14 +94,14 @@ public VectorColumnOrderedMap(String name) { orderedTreeMap = new TreeMap(); } - public void add(int orderedColumn, int valueColumn, String typeName) { + public void add(int orderedColumn, int valueColumn, TypeInfo typeInfo) { if (orderedTreeMap.containsKey(orderedColumn)) { throw new RuntimeException( name + " duplicate column " + orderedColumn + " in ordered column map " + orderedTreeMap.toString() + - " when adding value column " + valueColumn + ", type " + typeName); + " when adding value column " + valueColumn + ", type into " + typeInfo.toString()); } - orderedTreeMap.put(orderedColumn, new Value(valueColumn, typeName)); + orderedTreeMap.put(orderedColumn, new Value(valueColumn, typeInfo)); } public boolean orderedColumnsContain(int orderedColumn) { @@ -109,17 +111,16 @@ public boolean orderedColumnsContain(int orderedColumn) { public Mapping getMapping() { ArrayList orderedColumns = new ArrayList(); ArrayList valueColumns = new ArrayList(); - ArrayList typeNames = new ArrayList(); + ArrayList typeInfos = new ArrayList(); for (Map.Entry entry : orderedTreeMap.entrySet()) { orderedColumns.add(entry.getKey()); Value value = entry.getValue(); valueColumns.add(value.valueColumn); - typeNames.add(value.typeName); + typeInfos.add(value.typeInfo); } return new Mapping( ArrayUtils.toPrimitive(orderedColumns.toArray(new Integer[0])), ArrayUtils.toPrimitive(valueColumns.toArray(new Integer[0])), - typeNames.toArray(new String[0])); - + typeInfos.toArray(new TypeInfo[0])); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java index f35aff7..4ceff6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for copying a row from one VectorizedRowBatch to @@ -35,9 +36,9 @@ public VectorColumnOutputMapping(String name) { } @Override - public void add(int sourceColumn, int outputColumn, String typeName) { + public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) { // Order on outputColumn. - vectorColumnMapping.add(outputColumn, sourceColumn, typeName); + vectorColumnMapping.add(outputColumn, sourceColumn, typeInfo); } public boolean containsOutputColumn(int outputColumn) { @@ -51,7 +52,7 @@ public void finalize() { // Ordered columns are the output columns. sourceColumns = mapping.getValueColumns(); outputColumns = mapping.getOrderedColumns(); - typeNames = mapping.getTypeNames(); + typeInfos = mapping.getTypeInfos(); // Not needed anymore. vectorColumnMapping = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java index 4f5ba9a..061e396 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for copying a row from one VectorizedRowBatch to @@ -35,9 +36,9 @@ public VectorColumnSourceMapping(String name) { } @Override - public void add(int sourceColumn, int outputColumn, String typeName) { + public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) { // Order on sourceColumn. - vectorColumnMapping.add(sourceColumn, outputColumn, typeName); + vectorColumnMapping.add(sourceColumn, outputColumn, typeInfo); } @Override @@ -47,7 +48,7 @@ public void finalize() { // Ordered columns are the source columns. sourceColumns = mapping.getOrderedColumns(); outputColumns = mapping.getValueColumns(); - typeNames = mapping.getTypeNames(); + typeInfos = mapping.getTypeInfos(); // Not needed anymore. vectorColumnMapping = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index c8e0284..911aeb0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -262,8 +262,7 @@ public void init(VectorColumnMapping columnMapping) throws HiveException { for (int i = 0; i < count; i++) { int inputColumn = columnMapping.getInputColumns()[i]; int outputColumn = columnMapping.getOutputColumns()[i]; - String typeName = columnMapping.getTypeNames()[i].toLowerCase(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + TypeInfo typeInfo = columnMapping.getTypeInfos()[i]; Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); CopyRow copyRowByValue = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 261246b..bfe22b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; import com.google.common.annotations.VisibleForTesting; @@ -50,9 +51,8 @@ public VectorFilterOperator(CompilationOpContext ctx, VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(ctx); - ExprNodeDesc oldExpression = ((FilterDesc) conf).getPredicate(); - conditionEvaluator = vContext.getVectorExpression(oldExpression, VectorExpressionDescriptor.Mode.FILTER); this.conf = (FilterDesc) conf; + conditionEvaluator = ((VectorFilterDesc) this.conf.getVectorDesc()).getPredicateExpression(); } /** Kryo ctor. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 2605203..fef7c2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.util.JavaDataModel; @@ -65,6 +66,8 @@ private static final Logger LOG = LoggerFactory.getLogger( VectorGroupByOperator.class.getName()); + private VectorGroupByDesc vectorDesc; + /** * This is the vector of aggregators. They are stateless and only implement * the algorithm of how to compute the aggregation. state is kept in the @@ -756,16 +759,10 @@ public VectorGroupByOperator(CompilationOpContext ctx, this(ctx); GroupByDesc desc = (GroupByDesc) conf; this.conf = desc; - List keysDesc = desc.getKeys(); - keyExpressions = vContext.getVectorExpressions(keysDesc); - ArrayList aggrDesc = desc.getAggregators(); - aggregators = new VectorAggregateExpression[aggrDesc.size()]; - for (int i = 0; i < aggrDesc.size(); ++i) { - AggregationDesc aggDesc = aggrDesc.get(i); - aggregators[i] = vContext.getAggregatorExpression(aggDesc); - } - - isVectorOutput = desc.getVectorDesc().isVectorOutput(); + vectorDesc = (VectorGroupByDesc) desc.getVectorDesc(); + keyExpressions = vectorDesc.getKeyExpressions(); + aggregators = vectorDesc.getAggregators(); + isVectorOutput = vectorDesc.isVectorOutput(); vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(), /* vContextEnvironment */ vContext); @@ -834,7 +831,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { forwardCache = new Object[outputKeyLength + aggregators.length]; - switch (conf.getVectorDesc().getProcessingMode()) { + switch (vectorDesc.getProcessingMode()) { case GLOBAL: Preconditions.checkState(outputKeyLength == 0); processingMode = this.new ProcessingModeGlobalAggregate(); @@ -850,7 +847,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { break; default: throw new RuntimeException("Unsupported vector GROUP BY processing mode " + - conf.getVectorDesc().getProcessingMode().name()); + vectorDesc.getProcessingMode().name()); } processingMode.initialize(hconf); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index f7fec8f..6ea6122 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -45,9 +46,11 @@ private static final long serialVersionUID = 1L; + private VectorSelectDesc vectorDesc; + protected VectorExpression[] vExpressions = null; - private transient int [] projectedColumns = null; + private int [] projectedOutputColumns = null; private transient VectorExpressionWriter [] valueWriters = null; @@ -58,13 +61,9 @@ public VectorSelectOperator(CompilationOpContext ctx, VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(ctx); this.conf = (SelectDesc) conf; - List colList = this.conf.getColList(); - vExpressions = new VectorExpression[colList.size()]; - for (int i = 0; i < colList.size(); i++) { - ExprNodeDesc expr = colList.get(i); - VectorExpression ve = vContext.getVectorExpression(expr); - vExpressions[i] = ve; - } + vectorDesc = (VectorSelectDesc) this.conf.getVectorDesc(); + vExpressions = vectorDesc.getSelectExpressions(); + projectedOutputColumns = vectorDesc.getProjectedOutputColumns(); /** * Create a new vectorization context to create a new projection, but keep @@ -73,11 +72,10 @@ public VectorSelectOperator(CompilationOpContext ctx, vOutContext = new VectorizationContext(getName(), vContext); vOutContext.resetProjectionColumns(); - for (int i=0; i < colList.size(); ++i) { - String columnName = this.conf.getOutputColumnNames().get(i); - VectorExpression ve = vExpressions[i]; - vOutContext.addProjectionColumn(columnName, - ve.getOutputColumn()); + List outputColumnNames = this.conf.getOutputColumnNames(); + for (int i=0; i < projectedOutputColumns.length; ++i) { + String columnName = outputColumnNames.get(i); + vOutContext.addProjectionColumn(columnName, projectedOutputColumns[i]); } } @@ -110,11 +108,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { List outputFieldNames = conf.getOutputColumnNames(); outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector( outputFieldNames, objectInspectors); - - projectedColumns = new int [vExpressions.length]; - for (int i = 0; i < projectedColumns.length; i++) { - projectedColumns[i] = vExpressions[i].getOutputColumn(); - } } @Override @@ -139,8 +132,8 @@ public void process(Object row, int tag) throws HiveException { // Prepare output, set the projections int[] originalProjections = vrg.projectedColumns; int originalProjectionSize = vrg.projectionSize; - vrg.projectionSize = vExpressions.length; - vrg.projectedColumns = this.projectedColumns; + vrg.projectionSize = projectedOutputColumns.length; + vrg.projectedColumns = this.projectedOutputColumns; forward(vrg, outputObjInspector); // Revert the projected columns back, because vrg will be re-used. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index f088941..55881fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -92,6 +92,7 @@ import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -560,7 +561,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress } else { throw new HiveException( "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=chosen " + + " because hive.vectorized.adaptor.usage.mode=chosen" + " and the UDF wasn't one of the chosen ones"); } break; @@ -1262,6 +1263,35 @@ private String getNewInstanceArgumentString(Object [] args) { return "arguments: " + Arrays.toString(args) + ", argument classes: " + argClasses.toString(); } + private static int STACK_LENGTH_LIMIT = 15; + + public static String getStackTraceAsSingleLine(Throwable e) { + StringBuilder sb = new StringBuilder(); + sb.append(e); + sb.append(" stack trace: "); + StackTraceElement[] stackTrace = e.getStackTrace(); + int length = stackTrace.length; + boolean isTruncated = false; + if (length > STACK_LENGTH_LIMIT) { + length = STACK_LENGTH_LIMIT; + isTruncated = true; + } + for (int i = 0; i < length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append(stackTrace[i]); + } + if (isTruncated) { + sb.append(", ..."); + } + + // Attempt to cleanup stack trace elements that vary by VM. + String cleaned = sb.toString().replaceAll("GeneratedConstructorAccessor[0-9]*", "GeneratedConstructorAccessor"); + + return cleaned; + } + private VectorExpression instantiateExpression(Class vclass, TypeInfo returnType, Object...args) throws HiveException { VectorExpression ve = null; @@ -1273,14 +1303,14 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT ve = (VectorExpression) ctor.newInstance(); } catch (Exception ex) { throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + - StringUtils.stringifyException(ex)); + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength) { try { ve = (VectorExpression) ctor.newInstance(args); } catch (Exception ex) { throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + - StringUtils.stringifyException(ex)); + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength + 1) { // Additional argument is needed, which is the outputcolumn. @@ -1306,7 +1336,7 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT ve.setOutputType(outType); } catch (Exception ex) { throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + - StringUtils.stringifyException(ex)); + getStackTraceAsSingleLine(ex)); } } // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java index a403725..914bb1f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java @@ -20,10 +20,10 @@ /** * VectorizationContextRegion optional interface implemented by vectorized operators - * that are changing the vectorizaiton context (region boundary operators) + * that are changing the vectorization context (region boundary operators) */ public interface VectorizationContextRegion { VectorizationContext getOuputVectorizationContext(); -} +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 3e3844e..e546a65 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -38,7 +38,10 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java index c50af8d..b49ff39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java @@ -518,6 +518,11 @@ public void setPattern(String pattern) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", pattern " + pattern; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java index 57dc92b..96c08af 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java index 1f7697e..a120f2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java index 187f12b..447e258 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java @@ -51,4 +51,8 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java index 5ad745c..98c1f93 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java index aab3e70..5ab22ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java @@ -55,4 +55,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java index ea235d9..e1debcd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -148,6 +148,11 @@ public void setInputColumn(int inputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java index 267b0b1..b6a4f73 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java @@ -55,4 +55,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java index 07f94f5..e38e32b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -114,6 +114,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java index 27674c4..eac45e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java index ceefd61..86e0959 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java @@ -94,6 +94,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java index 4de95a5..9f71b9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -111,6 +111,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java index 7c3dca2..9bc1cdb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java @@ -51,4 +51,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java index b1c6b2d..4cc120a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -112,6 +112,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java index 7c06ff5..3469183 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java @@ -52,4 +52,8 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java index 376ce92..fd4c76a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java @@ -52,4 +52,9 @@ public int getMaxLength() { public void setMaxLength(int maxLength) { this.maxLength = maxLength; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputColumn + ", maxLength " + maxLength; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index e456b12..4b176ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -149,6 +149,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java index 504b354..074f9aa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -154,6 +154,11 @@ public void setInputColumn(int inputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index c8844c8..e577628 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -148,6 +148,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java index 62f3dc9..21b034a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java @@ -146,6 +146,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java index b8a58cd..0e23bfb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -21,8 +21,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; public class CastTimestampToBoolean extends VectorExpression { private static final long serialVersionUID = 1L; @@ -125,6 +123,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java index a955d79..92595d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -118,6 +118,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java index ba2e823..466043e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -21,8 +21,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; public class CastTimestampToLong extends VectorExpression { private static final long serialVersionUID = 1L; @@ -120,6 +118,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index ff7371d..42f9b60 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -314,6 +314,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 60ed2d4..297c372 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -317,6 +317,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 72749b7..487c4b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -268,6 +269,38 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + String value; + if (isNullValue) { + value = "null"; + } else { + switch (type) { + case LONG: + value = Long.toString(longValue); + break; + case DOUBLE: + value = Double.toString(doubleValue); + break; + case BYTES: + value = new String(bytesValue, StandardCharsets.UTF_8); + break; + case DECIMAL: + value = decimalValue.toString(); + break; + case TIMESTAMP: + value = timestampValue.toString(); + break; + case INTERVAL_DAY_TIME: + value = intervalDayTimeValue.toString(); + break; + default: + throw new RuntimeException("Unknown vector column type " + type); + } + } + return "val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()).build(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java index fafacce..e04280f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java @@ -168,6 +168,10 @@ public String getOutputType() { return "timestamp"; } + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java index a9ca93c..bce24ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -132,6 +132,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java index 59cf9da..62f29f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -135,6 +135,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java index 0601c66..9873303 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import java.util.Arrays; import java.util.HashSet; /** @@ -152,4 +153,10 @@ public Descriptor getDescriptor() { public void setInListValues(HiveDecimal[] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java index 4b1182c..a9e1f8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -134,6 +134,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java index dfc1aff..db65460 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -156,6 +158,11 @@ public void setInListValues(double[] a) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", values " + Arrays.toString(inListValues); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // return null since this will be handled as a special case in VectorizationContext diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColAndScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColAndScalar.java index bef1c18..578feb0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColAndScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColAndScalar.java @@ -73,6 +73,11 @@ public void setValue(long value) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColOrScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColOrScalar.java index ee0ac69..72f58b1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColOrScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterColOrScalar.java @@ -73,6 +73,11 @@ public void setValue(long value) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java index a865343..9ebc5ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import java.util.Arrays; import java.util.HashSet; /** @@ -171,4 +172,10 @@ public Descriptor getDescriptor() { public void setInListValues(HiveDecimal[] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java index 05dcb43..0252236 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java @@ -177,4 +177,10 @@ public Descriptor getDescriptor() { public void setInListValues(double [] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java index 41e3b0f..175b497 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -51,6 +53,12 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + // The children are input. + return null; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // IMPORTANT NOTE: For Multi-AND, the VectorizationContext class will catch cases with 3 or diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java index dc5139d..5ed1ed8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java @@ -232,6 +232,12 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + // The children are input. + return null; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // IMPORTANT NOTE: For Multi-OR, the VectorizationContext class will catch cases with 3 or diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java index a40f39c..dce1b43 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java @@ -177,4 +177,11 @@ public Descriptor getDescriptor() { public void setInListValues(long [] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarAndColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarAndColumn.java index bcc8f89..7092f4b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarAndColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarAndColumn.java @@ -73,6 +73,11 @@ public void setValue(long value) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarOrColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarOrColumn.java index f515e60..ab242ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarOrColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterScalarOrColumn.java @@ -73,6 +73,11 @@ public void setValue(long value) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java index e34ec75..200735c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -179,4 +181,9 @@ public Descriptor getDescriptor() { public void setInListValues(byte [][] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java index 70b393c..f65bd97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -176,4 +177,12 @@ public void setStructColumnExprs(VectorizationContext vContext, } this.fieldVectorColumnTypes = fieldVectorColumnTypes; } + + @Override + public String vectorExpressionParameters() { + return "structExpressions " + Arrays.toString(structExpressions) + + ", fieldVectorColumnTypes " + Arrays.toString(fieldVectorColumnTypes) + + ", structColumnMap " + Arrays.toString(structColumnMap); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java index 25a276a..a7666bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import java.util.HashSet; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -169,4 +170,10 @@ public Descriptor getDescriptor() { public void setInListValues(Timestamp[] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java index cc6afa5..76fdeb5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java @@ -134,6 +134,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java index 4691fe1..9fe3010 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java @@ -122,6 +122,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java index 561c152..569d7f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -122,6 +122,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java index 0120c0a..1b3127c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java @@ -130,6 +130,10 @@ public void setInputColumn(int inputColumn) { this.inputColumn = inputColumn; } + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java index b73e851..b527482 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java @@ -130,6 +130,10 @@ public void setInputColumn(int inputColumn) { this.inputColumn = inputColumn; } + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java index fa0a746..db45ed4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -143,6 +143,10 @@ public String getOutputType() { return "String"; } + @Override + public String vectorExpressionParameters() { + return "col " + inputCol; + } @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java index a18bb55..9eead7b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java @@ -120,6 +120,10 @@ public String getOutputType() { return outputType; } + public String vectorExpressionParameters() { + return "col " + colNum + ", decimalPlaces " + decimalPlaces; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java index 774551c..5f4e83a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -131,6 +131,11 @@ public void setInputColumn(int inputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java index b84d9be..b652226 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java @@ -132,6 +132,11 @@ public void setInputColumn(int inputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java index 402d0f8..2385a40 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java @@ -81,6 +81,11 @@ public void setType(String type) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()).build(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index f0f4f6d..514b453 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -167,6 +167,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index 804923e..98fa29e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -136,6 +136,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java index 8face7d..9dc3669 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java @@ -130,6 +130,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", val "+ arg3Scalar.toString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java index 40f2e08..4d4649f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java @@ -132,6 +132,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java index 43676dd..c8f3294 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java @@ -120,6 +120,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 06ba8f8..4c6015e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -166,6 +166,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index ca11a55..c8367c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -177,6 +177,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java index 4e09448..8b18ae0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -164,6 +166,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", val "+ new String(arg3Scalar, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java index 79ed71e..3a0c035 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -164,6 +166,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ new String(arg2Scalar, StandardCharsets.UTF_8) + ", col "+ arg3Column; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java index 2a35970..4a51693 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -133,6 +135,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ new String(arg2Scalar, StandardCharsets.UTF_8) + ", val "+ new String(arg3Scalar, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index 8441863..8219b3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -133,4 +133,9 @@ public int getOutputColumn() { public String getOutputType() { return "long"; } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; + } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java index 6b87ff2..eb0c1c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java @@ -130,4 +130,10 @@ public int getOutputColumn() { public String getOutputType() { return "timestamp"; } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", val "+ arg3Scalar; + } + } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java index 2162f17..3e4a195 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java @@ -131,4 +131,10 @@ public int getOutputColumn() { public String getOutputType() { return "timestamp"; } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java index 707f574..5273131 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java @@ -117,4 +117,10 @@ public int getOutputColumn() { public String getOutputType() { return "timestamp"; } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java index f19551e..2f6e7b9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -107,6 +107,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java index 3169bae..583ab7a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -105,6 +105,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java index 33f50e0..6fa9779 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java @@ -174,6 +174,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java index 68b6a87..f26c8e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java @@ -141,6 +141,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java index a77d41a..3b3c923 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java index be717a1..c174d5f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java @@ -135,6 +135,10 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java index 6ee5daf..dd2c3dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java index cd8d723..710ac23 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java index 053ced9..c8e07f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java index 16148f3..a234ae1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java index 25d52b3..8db8b86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java index 927856f..b06a876 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java index e6e54e9..b44e9bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java index ac10a83..ada4312 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java index 865fdb9..fa667ca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java @@ -154,6 +154,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java index 789ca3e..7d16ae0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + value; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java index fb15880..babac22 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -154,6 +156,10 @@ public void setInListValues(long [] a) { this.inListValues = a; } + public String vectorExpressionParameters() { + return "col " + colNum + ", values " + Arrays.toString(inListValues); + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java index 3c442da..b1958f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java @@ -153,6 +153,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java index 09d845c..a4cea31 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java @@ -135,6 +135,10 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java index afc80eb..15ba69b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java index e2e871d..38984c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java index ae675ed..47fb591 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java @@ -135,6 +135,10 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java index c664e35..d5801d7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java index 776ab9f..b6bbfd1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java @@ -136,6 +136,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + value + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java index fbca683..80b79a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java @@ -135,6 +135,11 @@ public String getOutputType() { } @Override + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java index f95c9ec..b8e3489 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java @@ -34,7 +34,7 @@ public abstract class MathFuncDoubleToDouble extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum; + protected int colNum; private int outputColumn; // Subclasses must override this with a function that implements the desired logic. @@ -133,4 +133,9 @@ public void setColNum(int colNum) { public String getOutputType() { return "double"; } + + @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java index 4b4f38d..3b55d06 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java @@ -133,4 +133,9 @@ public void setColNum(int colNum) { public String getOutputType() { return "double"; } + + @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java index 4b1f908..5e36c09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java @@ -33,7 +33,7 @@ public abstract class MathFuncLongToLong extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum; + protected int colNum; private int outputColumn; // Subclasses must override this with a function that implements the desired logic. @@ -125,4 +125,9 @@ public void setColNum(int colNum) { public String getOutputType() { return "long"; } + + @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index ea2a434..1ece4a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -122,6 +122,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java index 39a3d87..0990095 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java @@ -55,6 +55,11 @@ public double getDivisor() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", divisor " + divisor; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java index 12b7286..4809011 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java @@ -55,6 +55,11 @@ public long getDivisor() { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", divisor " + divisor; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundWithNumDigitsDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundWithNumDigitsDoubleToDouble.java index b8dfb41..4b791b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundWithNumDigitsDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundWithNumDigitsDoubleToDouble.java @@ -59,6 +59,11 @@ public void setArg(long l) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", decimalPlaces " + decimalPlaces.get(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java index 77749e9..a906bef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java @@ -138,6 +138,10 @@ public void setColNum1(int colNum1) { this.colNum1 = colNum1; } + public String vectorExpressionParameters() { + return "col " + colNum1; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java index 733e2a6..f8517dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java @@ -108,6 +108,11 @@ public void setColNum(int colNum) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.FILTER) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java index 7159178..b792bbe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java @@ -106,6 +106,11 @@ public void setColNum(int colNum) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.FILTER) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java index f387a5c..b58b49e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java @@ -138,6 +138,11 @@ public void setColNum1(int colNum1) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.FILTER) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java index b914196..cb3870e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java @@ -162,8 +162,12 @@ public String getOutputType() { return "String_Family"; } - @Override - public Descriptor getDescriptor() { + public String vectorExpressionParameters() { + return "col " + colNum; + } + +@Override +public Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( VectorExpressionDescriptor.Mode.PROJECTION) @@ -174,6 +178,6 @@ public Descriptor getDescriptor() { .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } +} } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java index b90e3c0..b1ceb9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -175,4 +175,9 @@ public Descriptor getDescriptor() { public void setInListValues(byte [][] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java index 1cd3c46..bd44390 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -149,6 +151,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(value, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java index 56bc97b..35666d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -440,6 +440,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 76602be..cdaf694 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -156,6 +156,10 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + return "col " + colNum; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java index b98f72d..b1e1dad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -149,6 +151,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 75a99f0..305d1a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -245,6 +245,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", start " + startIdx; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 0ff7af6..4a7dbdc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -272,6 +272,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", start " + startIdx + ", length " + length; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 016a695..527d3b3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -195,6 +195,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java index 89ef251..c87371f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java @@ -136,6 +136,10 @@ public String getOutputType() { return "String"; } + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java index 769c70a..4d06ad1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; @@ -172,4 +173,11 @@ public void setStructColumnExprs(VectorizationContext vContext, } this.fieldVectorColumnTypes = fieldVectorColumnTypes; } + + @Override + public String vectorExpressionParameters() { + return "structExpressions " + Arrays.toString(structExpressions) + + ", fieldVectorColumnTypes " + Arrays.toString(fieldVectorColumnTypes) + + ", structColumnMap " + Arrays.toString(structColumnMap); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java index bc09a3a..5e76de8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import java.util.HashSet; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -150,4 +151,9 @@ public Descriptor getDescriptor() { public void setInListValues(Timestamp[] a) { this.inListValues = a; } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol + ", values " + Arrays.toString(inListValues); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java index 052d57c..32cf527 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java @@ -133,6 +133,10 @@ public String getOutputType() { return "String"; } + public String vectorExpressionParameters() { + return "col " + inputColumn; + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java index 543d7f0..c0870c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java @@ -141,6 +141,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "columns " + Arrays.toString(inputColumns); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // Descriptor is not defined because it takes variable number of arguments with different diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java index 329d381..5e0e7aa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -129,6 +131,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "columns " + Arrays.toString(inputColumns); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { // Descriptor is not defined because it takes variable number of arguments with different // data types. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 8fca8a1..bf2d4ec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -22,6 +22,7 @@ import java.util.Map; import com.google.common.collect.ImmutableMap; + import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -138,25 +139,42 @@ public void setInputTypes(Type ... inputTypes) { return inputTypes; } + public String vectorExpressionParameters() { + return null; + } + @Override public String toString() { StringBuilder b = new StringBuilder(); - b.append(this.getClass().getSimpleName()); - b.append("["); - b.append(this.getOutputColumn()); - b.append(":"); - b.append(this.getOutputType()); - b.append("]"); - if (childExpressions != null) { - b.append("("); - for (int i = 0; i < childExpressions.length; i++) { - b.append(childExpressions[i].toString()); - if (i < childExpressions.length-1) { - b.append(" "); + if (this instanceof IdentityExpression) { + b.append(vectorExpressionParameters()); + } else { + b.append(this.getClass().getSimpleName()); + String vectorExpressionParameters = vectorExpressionParameters(); + if (vectorExpressionParameters != null) { + b.append("("); + b.append(vectorExpressionParameters); + b.append(")"); + } + if (childExpressions != null) { + b.append("(children: "); + for (int i = 0; i < childExpressions.length; i++) { + b.append(childExpressions[i].toString()); + if (i < childExpressions.length-1) { + b.append(", "); + } } + b.append(")"); + } + b.append(" -> "); + int outputColumn = getOutputColumn(); + if (outputColumn != -1) { + b.append(outputColumn); + b.append(":"); } - b.append(")"); + b.append(getOutputType()); } + return b.toString(); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index 4ce6e20..00e9e03 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -216,6 +216,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index 0e09f49..730dc36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -261,6 +261,11 @@ public void setNumDay(int numDays) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + numDays; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 724ea45..9787ade 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -205,6 +205,11 @@ public void setPositive(boolean isPositive) { this.isPositive = isPositive; } + @Override + public String vectorExpressionParameters() { + return "val " + stringValue + ", col " + colNum; + } + public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java index 4edf558..d3c5da2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java @@ -358,6 +358,11 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum1 + ", col " + colNum2; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 71b3887..7e8c19b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.text.ParseException; @@ -297,6 +298,11 @@ public void setStringValue(byte[] stringValue) { } @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", val " + new String(stringValue, StandardCharsets.UTF_8); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index c733bc9..9f2d476 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -296,6 +296,11 @@ public void setStringValue(byte[] stringValue) { } @Override + public String vectorExpressionParameters() { + return "val " + stringValue + ", col " + colNum; + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java index 0a3a87a..0255cfa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hive.common.util.DateUtils; import com.google.common.base.Preconditions; @@ -150,6 +151,15 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + if (field == -1) { + return "col " + colNum; + } else { + return "col " + colNum + ", field " + DateUtils.getFieldName(field); + } + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index 45e7a31..6719ce3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -177,6 +177,15 @@ public void setOutputColumn(int outputColumn) { } @Override + public String vectorExpressionParameters() { + if (fieldStart == -1) { + return "col " + colNum; + } else { + return "col " + colNum + ", fieldStart " + fieldStart + ", fieldLength " + fieldLength; + } + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java index 5fca678..e9000c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hive.common.util.DateUtils; import com.google.common.base.Preconditions; @@ -148,6 +149,14 @@ public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } + public String vectorExpressionParameters() { + if (field == -1) { + return "col " + colNum; + } else { + return "col " + colNum + ", field " + DateUtils.getFieldName(field); + } + } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java index 96e62cf..0866f63 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -55,7 +56,25 @@ public abstract void aggregateInputSelection(VectorAggregationBufferRow[] aggreg public boolean hasVariableSize() { return false; } + public abstract VectorExpression inputExpression(); public abstract void init(AggregationDesc desc) throws HiveException; + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(this.getClass().getSimpleName()); + VectorExpression inputExpression = inputExpression(); + if (inputExpression != null) { + sb.append("("); + sb.append(inputExpression.toString()); + sb.append(") -> "); + } else { + sb.append("(*) -> "); + } + ObjectInspector outputObjectInspector = getOutputObjectInspector(); + sb.append(outputObjectInspector.getTypeName()); + return sb.toString(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java index d0ff5fa..05b76c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java @@ -114,6 +114,12 @@ public void reset() { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private HiveDecimalWritable resultSum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java index d0a1d0d..483d9dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java @@ -84,6 +84,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private DoubleWritable resultSum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java index cf373a1..494febc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java @@ -60,6 +60,12 @@ public void reset() { } private VectorExpression inputExpression = null; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final LongWritable result; public VectorUDAFCount(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java index 577977f..dec88cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java @@ -61,6 +61,12 @@ public void reset() { } private VectorExpression inputExpression = null; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final LongWritable result; public VectorUDAFCountMerge(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java index 72beda8..337ba0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java @@ -57,6 +57,13 @@ public void reset() { } } + + @Override + public VectorExpression inputExpression() { + // None. + return null; + } + transient private final LongWritable result; public VectorUDAFCountStar(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java index fa25e6a..8cd3506 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java index b3e1fae..61d6977 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java index 3a5fef6..8a71ce3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java @@ -88,6 +88,12 @@ public void reset() { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final HiveDecimalWritable scratchDecimal; public VectorUDAFSumDecimal(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java index 970ec22..2709b07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java index 9af1a28..03dce1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index c288731..77b44fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; + import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -63,6 +65,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import com.google.common.base.Preconditions; + /** * This class is common operator class for native vectorized map join. * @@ -72,7 +76,43 @@ */ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implements VectorizationContextRegion { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinCommonOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinCommonOperator.class.getName(); +private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected abstract String getLoggingPrefix(); + + // For debug tracing: information about the map or reduce task, operator, operator class, etc. + protected transient String loggingPrefix; + + protected String getLoggingPrefix(String className) { + if (loggingPrefix == null) { + initLoggingPrefix(className); + } + return loggingPrefix; + } + + protected void initLoggingPrefix(String className) { + if (hconf == null) { + // Constructor time... + loggingPrefix = className; + } else { + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + loggingPrefix = className + " " + work.getName() + " " + getOperatorId(); + } + } + + //------------------------------------------------------------------------------------------------ + + protected VectorMapJoinDesc vectorDesc; + + protected VectorMapJoinInfo vectorMapJoinInfo; // Whether this operator is an outer join. protected boolean isOuterJoin; @@ -88,10 +128,10 @@ // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; - // The output column projection of the vectorized row batch. And, the type names of the output + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; - protected String[] outputTypeNames; + protected TypeInfo[] outputTypeInfos; // These are the vectorized batch expressions for filtering, key expressions, and value // expressions. @@ -101,15 +141,17 @@ // This is map of which vectorized row batch columns are the big table key columns. Since // we may have key expressions that produce new scratch columns, we need a mapping. - // And, we have their type names. + // And, we have their type infos. protected int[] bigTableKeyColumnMap; - protected ArrayList bigTableKeyTypeNames; + protected String[] bigTableKeyColumnNames; + protected TypeInfo[] bigTableKeyTypeInfos; // Similarly, this is map of which vectorized row batch columns are the big table value columns. // Since we may have value expressions that produce new scratch columns, we need a mapping. - // And, we have their type names. + // And, we have their type infos. protected int[] bigTableValueColumnMap; - protected ArrayList bigTableValueTypeNames; + protected String[] bigTableValueColumnNames; + protected TypeInfo[] bigTableValueTypeInfos; // This is a mapping of which big table columns (input and key/value expressions) will be // part of the big table portion of the join output result. @@ -124,6 +166,8 @@ // to output batch scratch columns for the small table portion. protected VectorColumnSourceMapping smallTableMapping; + protected VectorColumnSourceMapping projectionMapping; + // These are the output columns for the small table and the outer small table keys. protected int[] smallTableOutputVectorColumns; protected int[] bigTableOuterKeyOutputVectorColumns; @@ -137,9 +181,6 @@ // transient. //--------------------------------------------------------------------------- - // For debug tracing: the name of the map or reduce task. - protected transient String taskName; - // The threshold where we should use a repeating vectorized row batch optimization for // generating join output results. protected transient boolean useOverflowRepeatedThreshold; @@ -192,6 +233,9 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, MapJoinDesc desc = (MapJoinDesc) conf; this.conf = desc; + vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); + vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo(); + Preconditions.checkState(vectorMapJoinInfo != null); this.vContext = vContext; @@ -210,214 +254,28 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER); - List keyDesc = desc.getKeys().get(posBigTable); - bigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); - - // Since a key expression can be a calculation and the key will go into a scratch column, - // we need the mapping and type information. - bigTableKeyColumnMap = new int[bigTableKeyExpressions.length]; - bigTableKeyTypeNames = new ArrayList(); - boolean onlyColumns = true; - for (int i = 0; i < bigTableKeyColumnMap.length; i++) { - VectorExpression ve = bigTableKeyExpressions[i]; - if (!IdentityExpression.isColumnOnly(ve)) { - onlyColumns = false; - } - bigTableKeyTypeNames.add(keyDesc.get(i).getTypeString()); - bigTableKeyColumnMap[i] = ve.getOutputColumn(); - } - if (onlyColumns) { - bigTableKeyExpressions = null; - } - - List bigTableExprs = desc.getExprs().get(posBigTable); - bigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); - - /* - * Similarly, we need a mapping since a value expression can be a calculation and the value - * will go into a scratch column. - */ - bigTableValueColumnMap = new int[bigTableValueExpressions.length]; - bigTableValueTypeNames = new ArrayList(); - onlyColumns = true; - for (int i = 0; i < bigTableValueColumnMap.length; i++) { - VectorExpression ve = bigTableValueExpressions[i]; - if (!IdentityExpression.isColumnOnly(ve)) { - onlyColumns = false; - } - bigTableValueTypeNames.add(bigTableExprs.get(i).getTypeString()); - bigTableValueColumnMap[i] = ve.getOutputColumn(); - } - if (onlyColumns) { - bigTableValueExpressions = null; - } - - determineCommonInfo(isOuterJoin); - } - - protected void determineCommonInfo(boolean isOuter) throws HiveException { - - bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping"); - - bigTableOuterKeyMapping = new VectorColumnOutputMapping("Big Table Outer Key Mapping"); - - // The order of the fields in the LazyBinary small table value must be used, so - // we use the source ordering flavor for the mapping. - smallTableMapping = new VectorColumnSourceMapping("Small Table Mapping"); - - // We use a mapping object here so we can build the projection in any order and - // get the ordered by 0 to n-1 output columns at the end. - // - // Also, to avoid copying a big table key into the small table result area for inner joins, - // we reference it with the projection so there can be duplicate output columns - // in the projection. - VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); - - /* - * Gather up big and small table output result information from the MapJoinDesc. - */ - List bigTableRetainList = conf.getRetainList().get(posBigTable); - int bigTableRetainSize = bigTableRetainList.size(); - - int[] smallTableIndices; - int smallTableIndicesSize; - List smallTableExprs = conf.getExprs().get(posSingleVectorMapJoinSmallTable); - if (conf.getValueIndices() != null && conf.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { - smallTableIndices = conf.getValueIndices().get(posSingleVectorMapJoinSmallTable); - smallTableIndicesSize = smallTableIndices.length; - } else { - smallTableIndices = null; - smallTableIndicesSize = 0; - } - - List smallTableRetainList = conf.getRetainList().get(posSingleVectorMapJoinSmallTable); - int smallTableRetainSize = smallTableRetainList.size(); - - int smallTableResultSize = 0; - if (smallTableIndicesSize > 0) { - smallTableResultSize = smallTableIndicesSize; - } else if (smallTableRetainSize > 0) { - smallTableResultSize = smallTableRetainSize; - } - - /* - * Determine the big table retained mapping first so we can optimize out (with - * projection) copying inner join big table keys in the subsequent small table results section. - */ - int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); - for (int i = 0; i < bigTableRetainSize; i++) { - - // Since bigTableValueExpressions may do a calculation and produce a scratch column, we - // need to map to the right batch column. - - int retainColumn = bigTableRetainList.get(i); - int batchColumnIndex = bigTableValueColumnMap[retainColumn]; - String typeName = bigTableValueTypeNames.get(i); - - // With this map we project the big table batch to make it look like an output batch. - projectionMapping.add(nextOutputColumn, batchColumnIndex, typeName); - - // Collect columns we copy from the big table batch to the overflow batch. - if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { - // Tolerate repeated use of a big table column. - bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeName); - } - - nextOutputColumn++; - } - - /* - * Now determine the small table results. - */ - int firstSmallTableOutputColumn; - firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0); - int smallTableOutputCount = 0; - nextOutputColumn = firstSmallTableOutputColumn; - - // Small table indices has more information (i.e. keys) than retain, so use it if it exists... - if (smallTableIndicesSize > 0) { - smallTableOutputCount = smallTableIndicesSize; - - for (int i = 0; i < smallTableIndicesSize; i++) { - if (smallTableIndices[i] >= 0) { - - // Zero and above numbers indicate a big table key is needed for - // small table result "area". - - int keyIndex = smallTableIndices[i]; - - // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we - // need to map the right column. - int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; - String typeName = bigTableKeyTypeNames.get(keyIndex); - - if (!isOuter) { - - // Optimize inner join keys of small table results. - - // Project the big table key into the small table result "area". - projectionMapping.add(nextOutputColumn, batchKeyColumn, typeName); - - if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { - // If necessary, copy the big table key into the overflow batch's small table - // result "area". - bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeName); - } - } else { - - // For outer joins, since the small table key can be null when there is no match, - // we must have a physical (scratch) column for those keys. We cannot use the - // projection optimization used by inner joins above. - - int scratchColumn = vOutContext.allocateScratchColumn(typeName); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); - - bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeName); + bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); + bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); + bigTableKeyTypeInfos = vectorMapJoinInfo.getBigTableKeyTypeInfos(); + bigTableKeyExpressions = vectorMapJoinInfo.getBigTableKeyExpressions(); - bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeName); - } - } else { + bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); + bigTableValueColumnNames = vectorMapJoinInfo.getBigTableValueColumnNames(); + bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); + bigTableValueExpressions = vectorMapJoinInfo.getBigTableValueExpressions(); - // Negative numbers indicate a column to be (deserialize) read from the small table's - // LazyBinary value row. - int smallTableValueIndex = -smallTableIndices[i] - 1; + bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); - String typeName = smallTableExprs.get(i).getTypeString(); + bigTableOuterKeyMapping = vectorMapJoinInfo.getBigTableOuterKeyMapping(); - // Make a new big table scratch column for the small table value. - int scratchColumn = vOutContext.allocateScratchColumn(typeName); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); + smallTableMapping = vectorMapJoinInfo.getSmallTableMapping(); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeName); - } - nextOutputColumn++; - } - } else if (smallTableRetainSize > 0) { - smallTableOutputCount = smallTableRetainSize; - - // Only small table values appear in join output result. - - for (int i = 0; i < smallTableRetainSize; i++) { - int smallTableValueIndex = smallTableRetainList.get(i); - - // Make a new big table scratch column for the small table value. - String typeName = smallTableExprs.get(i).getTypeString(); - int scratchColumn = vOutContext.allocateScratchColumn(typeName); + projectionMapping = vectorMapJoinInfo.getProjectionMapping(); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); - - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeName); - nextOutputColumn++; - } - } - - // Convert dynamic arrays and maps to simple arrays. - - bigTableRetainedMapping.finalize(); - - bigTableOuterKeyMapping.finalize(); + determineCommonInfo(isOuterJoin); + } - smallTableMapping.finalize(); + protected void determineCommonInfo(boolean isOuter) throws HiveException { bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns(); smallTableOutputVectorColumns = smallTableMapping.getOutputColumns(); @@ -429,46 +287,37 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); - projectionMapping.finalize(); - - // Verify we added an entry for each output. - assert projectionMapping.isSourceSequenceGood(); - outputProjection = projectionMapping.getOutputColumns(); - outputTypeNames = projectionMapping.getTypeNames(); + outputTypeInfos = projectionMapping.getTypeInfos(); if (isLogDebugEnabled) { int[] orderDisplayable = new int[order.length]; for (int i = 0; i < order.length; i++) { orderDisplayable[i] = (int) order[i]; } - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); - - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeNames " + bigTableKeyTypeNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + bigTableValueTypeNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableIndices " + Arrays.toString(smallTableIndices)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableRetainList " + smallTableRetainList); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor firstSmallTableOutputColumn " + firstSmallTableOutputColumn); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableOutputCount " + smallTableOutputCount); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); - - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); } setupVOutContext(conf.getOutputColumnNames()); @@ -482,10 +331,10 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { ArrayList list = new ArrayList(); int count = mapping.getCount(); int[] outputColumns = mapping.getOutputColumns(); - String[] typeNames = mapping.getTypeNames(); + TypeInfo[] typeInfos = mapping.getTypeInfos(); for (int i = 0; i < count; i++) { int outputColumn = outputColumns[i]; - String typeName = typeNames[i]; + String typeName = typeInfos[i].getTypeName(); if (VectorizationContext.isStringFamily(typeName)) { list.add(outputColumn); } @@ -500,10 +349,10 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { */ protected void setupVOutContext(List outputColumnNames) { if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); } if (outputColumnNames.size() != outputProjection.length) { - throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeNames) + " length mismatch"); + throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeInfos) + " length mismatch"); } vOutContext.resetProjectionColumns(); for (int i = 0; i < outputColumnNames.size(); ++i) { @@ -512,7 +361,7 @@ protected void setupVOutContext(List outputColumnNames) { vOutContext.addProjectionColumn(columnName, outputColumn); if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn); } } } @@ -522,7 +371,7 @@ protected void setupVOutContext(List outputColumnNames) { */ @Override protected HashTableLoader getHashTableLoader(Configuration hconf) { - VectorMapJoinDesc vectorDesc = conf.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); HashTableLoader hashTableLoader; switch (vectorDesc.hashTableImplementationType()) { @@ -546,15 +395,6 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - if (isLogDebugEnabled) { - // Determine the name of our map or reduce task for debug tracing. - BaseWork work = Utilities.getMapWork(hconf); - if (work == null) { - work = Utilities.getReduceWork(hconf); - } - taskName = work.getName(); - } - /* * Get configuration parameters. */ @@ -570,9 +410,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { smallTableVectorDeserializeRow = new VectorDeserializeRow( new LazyBinaryDeserializeRead( - VectorizedBatchUtil.typeInfosFromTypeNames( - smallTableMapping.getTypeNames()), - /* useExternalBuffer */ true)); + smallTableMapping.getTypeInfos(), + /* useExternalBuffer */ true)); smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns()); } @@ -596,13 +435,13 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (isLogDebugEnabled) { int[] currentScratchColumns = vOutContext.currentScratchColumns(); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns)); StructObjectInspector structOutputObjectInspector = (StructObjectInspector) outputObjInspector; List fields = structOutputObjectInspector.getAllStructFieldRefs(); int i = 0; for (StructField field : fields) { - LOG.debug("VectorMapJoinInnerBigOnlyCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName()); i++; } } @@ -613,7 +452,7 @@ protected void completeInitializationOp(Object[] os) throws HiveException { // setup mapJoinTables and serdes super.completeInitializationOp(os); - VectorMapJoinDesc vectorDesc = conf.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); switch (vectorDesc.hashTableImplementationType()) { case OPTIMIZED: @@ -655,7 +494,7 @@ protected VectorizedRowBatch setupOverflowBatch() throws HiveException { // First, just allocate just the projection columns we will be using. for (int i = 0; i < outputProjection.length; i++) { int outputColumn = outputProjection[i]; - String typeName = outputTypeNames[i]; + String typeName = outputTypeInfos[i].getTypeName(); allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); } @@ -687,7 +526,7 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, overflowBatch.cols[outputColumn] = VectorizedBatchUtil.createColumnVector(typeInfo); if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName()); } } } @@ -724,9 +563,9 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { } protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) { - LOG.debug("commonSetup " + batchName + " column count " + batch.numCols); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column count " + batch.numCols); for (int column = 0; column < batch.numCols; column++) { - LOG.debug("commonSetup " + batchName + " column " + column + " type " + (batch.cols[column] == null ? "NULL" : batch.cols[column].getClass().getSimpleName())); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column " + column + " type " + (batch.cols[column] == null ? "NULL" : batch.cols[column].getClass().getSimpleName())); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 0bba141..43f3951 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index 621804b..95fb0c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on Multi-Key * and only big table columns appear in the join result so a hash multi-set is used. @@ -48,8 +50,17 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -114,7 +125,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index 10e75ab..044e3e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -46,8 +46,17 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 804d69c..c85e1d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -44,8 +44,17 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index fcfa0bd..a108cd0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on a Multi-Key * using a hash map. @@ -46,8 +48,17 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerMultiKeyOperator.class.getName()); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -112,7 +123,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index 0f9baae..3211d7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 1149a9d..b02e6fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index e0baebc..36b8f3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an left semi join on Multi-Key * using hash set. @@ -47,8 +49,17 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -113,7 +124,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 49e1177..0b3de0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -46,8 +46,17 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index 58bd0ab..72309e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -45,8 +45,17 @@ */ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index 7f9afd2..a4fc7d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an outer join on Multi-Key * using a hash map. @@ -47,8 +49,17 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -112,7 +123,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 8ed1ed4..6e7e5cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 9f3b107..069cc9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -94,7 +94,7 @@ public VectorMapJoinHashTable vectorMapJoinHashTable() { private VectorMapJoinFastHashTable createHashTable(int newThreshold) { boolean isOuterJoin = !desc.isNoOuterJoin(); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); HashTableKind hashTableKind = vectorDesc.hashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index f34b1cd..111a6d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -40,7 +40,7 @@ public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, ReusableGetAdaptor hashMapRowGetter = mapJoinTableContainer.createGetter(refKey); boolean isOuterJoin = !desc.isNoOuterJoin(); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableKind hashTableKind = vectorDesc.hashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType(); boolean minMaxEnabled = vectorDesc.minMaxEnabled(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java index 8133aef..42ca4b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java @@ -160,7 +160,7 @@ public VectorReduceSinkCommonOperator(CompilationOpContext ctx, ReduceSinkDesc desc = (ReduceSinkDesc) conf; this.conf = desc; - vectorDesc = desc.getVectorDesc(); + vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc(); vectorReduceSinkInfo = vectorDesc.getVectorReduceSinkInfo(); this.vContext = vContext; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 20cfb89..6806ab4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -374,6 +374,11 @@ public void setExpr(ExprNodeGenericFuncDesc expr) { } @Override + public String vectorExpressionParameters() { + return expr.getExprString(); + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()).build(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 3a179a3..6167f48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; import java.io.Serializable; +import java.lang.annotation.Annotation; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -33,6 +34,7 @@ import java.util.Set; import java.util.Stack; import java.util.regex.Pattern; +import org.apache.commons.lang.ArrayUtils; import org.apache.calcite.util.Pair; import org.apache.commons.lang.ArrayUtils; @@ -43,6 +45,8 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -62,7 +66,11 @@ import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; @@ -73,6 +81,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -91,18 +100,36 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.LimitDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; +import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc; +import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; +import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorLimitDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; +import org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; @@ -117,10 +144,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.udf.UDFAcos; import org.apache.hadoop.hive.ql.udf.UDFAsin; @@ -170,6 +200,9 @@ import org.apache.hadoop.hive.serde2.NullStructSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -182,6 +215,9 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hive.common.util.AnnotationUtils; +import org.apache.hive.common.util.HiveStringUtils; +import org.apache.hive.common.util.ReflectionUtil; import com.google.common.base.Preconditions; @@ -234,12 +270,39 @@ private boolean isSpark; - boolean useVectorizedInputFileFormat; - boolean useVectorDeserialize; - boolean useRowDeserialize; + private boolean useVectorizedInputFileFormat; + private boolean useVectorDeserialize; + private boolean useRowDeserialize; + private boolean isReduceVectorizationEnabled; boolean isSchemaEvolution; + private BaseWork currentBaseWork; + private Operator currentOperator; + + public void testSetCurrentBaseWork(BaseWork testBaseWork) { + currentBaseWork = testBaseWork; + } + + private void setNodeIssue(String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createNodeIssue(issue)); + } + + private void setOperatorIssue(String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createOperatorIssue(currentOperator, issue)); + } + + private void setExpressionIssue(String expressionTitle, String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createExpressionIssue(currentOperator, expressionTitle, issue)); + } + + private void clearNotVectorizedReason() { + currentBaseWork.setNotVectorizedReason(null); + } + public Vectorizer() { supportedGenericUDFs.add(GenericUDFOPPlus.class); @@ -369,6 +432,10 @@ public Vectorizer() { int partitionColumnCount; boolean useVectorizedInputFileFormat; + boolean groupByVectorOutput; + boolean allNative; + boolean usesVectorUDFAdaptor; + String[] scratchTypeNameArray; Set> nonVectorizedOps; @@ -379,6 +446,12 @@ public Vectorizer() { partitionColumnCount = 0; } + public void assume() { + groupByVectorOutput = true; + allNative = true; + usesVectorUDFAdaptor = false; + } + public void setAllColumnNames(List allColumnNames) { this.allColumnNames = allColumnNames; } @@ -394,9 +467,19 @@ public void setPartitionColumnCount(int partitionColumnCount) { public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } + public void setGroupByVectorOutput(boolean groupByVectorOutput) { + this.groupByVectorOutput = groupByVectorOutput; + } + public void setAllNative(boolean allNative) { + this.allNative = allNative; + } + public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { + this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; + } public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; } + public void setNonVectorizedOps(Set> nonVectorizedOps) { this.nonVectorizedOps = nonVectorizedOps; } @@ -428,7 +511,14 @@ public void transferToBaseWork(BaseWork baseWork) { scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); - baseWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); + if (baseWork instanceof MapWork) { + MapWork mapWork = (MapWork) baseWork; + mapWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); + } + + baseWork.setAllNative(allNative); + baseWork.setGroupByVectorOutput(groupByVectorOutput); + baseWork.setUsesVectorUDFAdaptor(usesVectorUDFAdaptor); } } @@ -445,17 +535,29 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task currTask = (Task) nd; if (currTask instanceof MapRedTask) { - convertMapWork(((MapRedTask) currTask).getWork().getMapWork(), false); + MapredWork mapredWork = ((MapRedTask) currTask).getWork(); + convertMapWork(mapredWork.getMapWork(), false); + ReduceWork reduceWork = mapredWork.getReduceWork(); + if (reduceWork != null) { + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + // We do not vectorize MR Reduce. + } } else if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); - for (BaseWork w: work.getAllWork()) { - if (w instanceof MapWork) { - convertMapWork((MapWork) w, true); - } else if (w instanceof ReduceWork) { - // We are only vectorizing Reduce under Tez. - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { - convertReduceWork((ReduceWork) w, true); + for (BaseWork baseWork: work.getAllWork()) { + if (baseWork instanceof MapWork) { + convertMapWork((MapWork) baseWork, true); + } else if (baseWork instanceof ReduceWork) { + ReduceWork reduceWork = (ReduceWork) baseWork; + + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + // We are only vectorizing Reduce under Tez/Spark. + if (isReduceVectorizationEnabled) { + convertReduceWork(reduceWork); } } } @@ -463,22 +565,43 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) SparkWork sparkWork = (SparkWork) currTask.getWork(); for (BaseWork baseWork : sparkWork.getAllWork()) { if (baseWork instanceof MapWork) { - convertMapWork((MapWork) baseWork, false); - } else if (baseWork instanceof ReduceWork - && HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { - convertReduceWork((ReduceWork) baseWork, false); + convertMapWork((MapWork) baseWork, true); + } else if (baseWork instanceof ReduceWork) { + ReduceWork reduceWork = (ReduceWork) baseWork; + + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + if (isReduceVectorizationEnabled) { + convertReduceWork(reduceWork); + } } } } + return null; } - private void convertMapWork(MapWork mapWork, boolean isTez) throws SemanticException { + private void convertMapWork(MapWork mapWork, boolean isTezOrSpark) throws SemanticException { + + mapWork.setVectorizationExamined(true); + + // Global used when setting errors, etc. + currentBaseWork = mapWork; + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); - boolean ret = validateMapWork(mapWork, vectorTaskColumnInfo, isTez); + vectorTaskColumnInfo.assume(); + + boolean ret = validateMapWork(mapWork, vectorTaskColumnInfo, isTezOrSpark); if (ret) { - vectorizeMapWork(mapWork, vectorTaskColumnInfo, isTez); + vectorizeMapWork(mapWork, vectorTaskColumnInfo, isTezOrSpark); + } else if (currentBaseWork.getVectorizationEnabled()) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + LOG.info("Cannot vectorize: unknown"); + } else { + LOG.info("Cannot vectorize: " + notVectorizedReason.toString()); + } } } @@ -499,6 +622,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) LinkedHashMap> aliasToWork = mapWork.getAliasToWork(); if ((aliasToWork == null) || (aliasToWork.size() == 0)) { + setNodeIssue("Vectorized map work requires work"); return null; } int tableScanCount = 0; @@ -507,7 +631,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) for (Entry> entry : aliasToWork.entrySet()) { Operator op = entry.getValue(); if (op == null) { - LOG.warn("Map work has invalid aliases to work with. Fail validation!"); + setNodeIssue("Vectorized map work requires a valid alias"); return null; } if (op instanceof TableScanOperator) { @@ -517,7 +641,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) } } if (tableScanCount > 1) { - LOG.warn("Map work has more than 1 TableScanOperator. Fail validation!"); + setNodeIssue("Vectorized map work only works with 1 TableScanOperator"); return null; } return new ImmutablePair(alias, tableScanOperator); @@ -558,22 +682,6 @@ private void determineDataColumnNums(TableScanOperator tableScanOperator, } } - private String getHiveOptionsString() { - StringBuilder sb = new StringBuilder(); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); - sb.append("="); - sb.append(useVectorizedInputFileFormat); - sb.append(", "); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); - sb.append("="); - sb.append(useVectorDeserialize); - sb.append(", and "); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); - sb.append("="); - sb.append(useRowDeserialize); - return sb.toString(); - } - /* * There are 3 modes of reading for vectorization: * @@ -588,44 +696,58 @@ private String getHiveOptionsString() { * the row object into the VectorizedRowBatch with VectorAssignRow. * This picks up Input File Format not supported by the other two. */ - private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable) { + private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable, + HashSet inputFileFormatClassNameSet, HashSet enabledConditionsMetSet, + ArrayList enabledConditionsNotMetList) { String inputFileFormatClassName = pd.getInputFileFormatClassName(); + // Always collect input file formats. + inputFileFormatClassNameSet.add(inputFileFormatClassName); + + boolean isInputFileFormatVectorized = Utilities.isInputFileFormatVectorized(pd); + + if (isAcidTable) { + + // Today, ACID tables are only ORC and that format is vectorizable. Verify these + // assumptions. + Preconditions.checkState(isInputFileFormatVectorized); + Preconditions.checkState(inputFileFormatClassName.equals(OrcInputFormat.class.getName())); + + if (!useVectorizedInputFileFormat) { + enabledConditionsNotMetList.add( + "Vectorizing ACID tables requires " + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + return false; + } + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorizedInputFileFormat( + inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd))); + + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + return true; + } + // Look for Pass-Thru case where InputFileFormat has VectorizedInputFormatInterface // and reads VectorizedRowBatch as a "row". - if (isAcidTable || useVectorizedInputFileFormat) { + if (useVectorizedInputFileFormat) { - if (Utilities.isInputFileFormatVectorized(pd)) { - - if (!useVectorizedInputFileFormat) { - LOG.info("ACID tables con only be vectorized for the input file format -- " + - "i.e. when Hive Configuration option " + - HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname + - "=true"); - return false; - } + if (isInputFileFormatVectorized) { pd.setVectorPartitionDesc( VectorPartitionDesc.createVectorizedInputFileFormat( inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd))); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); return true; } - - // Today, ACID tables are only ORC and that format is vectorizable. Verify this - // assumption. - Preconditions.checkState(!isAcidTable); + // Fall through and look for other options... } - if (!(isSchemaEvolution || isAcidTable) && - (useVectorDeserialize || useRowDeserialize)) { - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " when both " + HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION.varname + "=false and " + - " ACID table is " + isAcidTable + " and " + - " given the Hive Configuration options " + getHiveOptionsString()); - return false; + if (!isSchemaEvolution) { + enabledConditionsNotMetList.add( + "Vectorizing tables without Schema Evolution requires " + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); } String deserializerClassName = pd.getDeserializerClassName(); @@ -635,6 +757,12 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable // // Do the "vectorized" row-by-row deserialization into a VectorizedRowBatch in the // VectorMapOperator. + boolean isTextFormat = inputFileFormatClassName.equals(TextInputFormat.class.getName()) && + deserializerClassName.equals(LazySimpleSerDe.class.getName()); + boolean isSequenceFormat = + inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && + deserializerClassName.equals(LazyBinarySerDe.class.getName()); + boolean isVectorDeserializeEligable = isTextFormat || isSequenceFormat; if (useVectorDeserialize) { @@ -648,8 +776,7 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable // org.apache.hadoop.mapred.SequenceFileInputFormat // org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - if (inputFileFormatClassName.equals(TextInputFormat.class.getName()) && - deserializerClassName.equals(LazySimpleSerDe.class.getName())) { + if (isTextFormat) { Properties properties = pd.getTableDesc().getProperties(); String lastColumnTakesRestString = @@ -659,10 +786,11 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable lastColumnTakesRestString.equalsIgnoreCase("true")); if (lastColumnTakesRest) { - // If row mode will not catch this, then inform. + // If row mode will not catch this input file format, then not enabled. if (useRowDeserialize) { - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " when " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST + "is true"); + enabledConditionsNotMetList.add( + inputFileFormatClassName + " " + + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST + " must be disabled "); return false; } } else { @@ -670,17 +798,19 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable VectorPartitionDesc.createVectorDeserialize( inputFileFormatClassName, VectorDeserializeType.LAZY_SIMPLE)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); return true; } - } else if (inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && - deserializerClassName.equals(LazyBinarySerDe.class.getName())) { + } else if (isSequenceFormat) { pd.setVectorPartitionDesc( VectorPartitionDesc.createVectorDeserialize( inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); return true; } + // Fall through and look for other options... } // Otherwise, if enabled, deserialize rows using regular Serde and add the object @@ -694,17 +824,29 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable Utilities.isInputFileFormatSelfDescribing(pd), deserializerClassName)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); return true; } - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " given the Hive Configuration options " + getHiveOptionsString()); - + if (isInputFileFormatVectorized) { + Preconditions.checkState(!useVectorizedInputFileFormat); + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + } else { + // Only offer these when the input file format is not the fast vectorized formats. + if (isVectorDeserializeEligable) { + Preconditions.checkState(!useVectorDeserialize); + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); + } else { + // Since row mode takes everyone. + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); + } + } + return false; } - private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, + private ImmutablePair validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { @@ -732,27 +874,39 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al LinkedHashMap> pathToAliases = mapWork.getPathToAliases(); LinkedHashMap pathToPartitionInfo = mapWork.getPathToPartitionInfo(); + + // Remember the input file formats we validated and why. + HashSet inputFileFormatClassNameSet = new HashSet(); + HashSet enabledConditionsMetSet = new HashSet(); + ArrayList enabledConditionsNotMetList = new ArrayList(); + for (Entry> entry: pathToAliases.entrySet()) { Path path = entry.getKey(); List aliases = entry.getValue(); boolean isPresent = (aliases != null && aliases.indexOf(alias) != -1); if (!isPresent) { - LOG.info("Alias " + alias + " not present in aliases " + aliases); - return false; + setOperatorIssue("Alias " + alias + " not present in aliases " + aliases); + return new ImmutablePair(false, false); } PartitionDesc partDesc = pathToPartitionInfo.get(path); if (partDesc.getVectorPartitionDesc() != null) { // We seen this already. continue; } - if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable)) { - return false; + if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable, inputFileFormatClassNameSet, + enabledConditionsMetSet, enabledConditionsNotMetList)) { + + // Always set these so EXPLAIN can see. + mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet); + mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet)); + mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); + + // We consider this an enable issue, not a not vectorized issue. + LOG.info("Cannot enable vectorization because input file format(s) " + inputFileFormatClassNameSet + + " do not met conditions " + VectorizationCondition.addBooleans(enabledConditionsNotMetList, false)); + return new ImmutablePair(false, true); } VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); - if (LOG.isInfoEnabled()) { - LOG.info("Vectorizer path: " + path + ", " + vectorPartDesc.toString() + - ", aliases " + aliases); - } if (isFirst) { @@ -796,13 +950,13 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al * implicitly defaulted to null. */ if (nextDataColumnList.size() > tableDataColumnList.size()) { - LOG.info( + setOperatorIssue( String.format( "Could not vectorize partition %s " + "(deserializer " + deserializer.getClass().getName() + ")" + "The partition column names %d is greater than the number of table columns %d", path, nextDataColumnList.size(), tableDataColumnList.size())); - return false; + return new ImmutablePair(false, false); } if (!(deserializer instanceof NullStructSerDe)) { @@ -811,13 +965,13 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al String nextColumnName = nextDataColumnList.get(i); String tableColumnName = tableDataColumnList.get(i); if (!nextColumnName.equals(tableColumnName)) { - LOG.info( + setOperatorIssue( String.format( "Could not vectorize partition %s " + "(deserializer " + deserializer.getClass().getName() + ")" + "The partition column name %s is does not match table column name %s", path, nextColumnName, tableColumnName)); - return false; + return new ImmutablePair(false, false); } } } @@ -852,29 +1006,50 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al // Helps to keep this for debugging. vectorTaskColumnInfo.setTableScanOperator(tableScanOperator); - return true; + // Always set these so EXPLAIN can see. + mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet); + mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet)); + mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); + + return new ImmutablePair(true, false); } - private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) + private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTezOrSpark) throws SemanticException { LOG.info("Validating MapWork..."); - ImmutablePair pair = verifyOnlyOneTableScanOperator(mapWork); - if (pair == null) { + ImmutablePair onlyOneTableScanPair = verifyOnlyOneTableScanOperator(mapWork); + if (onlyOneTableScanPair == null) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + Preconditions.checkState(notVectorizedReason != null); + mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); return false; } - String alias = pair.left; - TableScanOperator tableScanOperator = pair.right; + String alias = onlyOneTableScanPair.left; + TableScanOperator tableScanOperator = onlyOneTableScanPair.right; // This call fills in the column names, types, and partition column count in // vectorTaskColumnInfo. - if (!validateInputFormatAndSchemaEvolution(mapWork, alias, tableScanOperator, vectorTaskColumnInfo)) { + currentOperator = tableScanOperator; + ImmutablePair validateInputFormatAndSchemaEvolutionPair = + validateInputFormatAndSchemaEvolution(mapWork, alias, tableScanOperator, vectorTaskColumnInfo); + if (!validateInputFormatAndSchemaEvolutionPair.left) { + // Have we already set the enabled conditions not met? + if (!validateInputFormatAndSchemaEvolutionPair.right) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + Preconditions.checkState(notVectorizedReason != null); + mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); + } return false; } + // Now we are enabled and any issues found from here on out are considered + // not vectorized issues. + mapWork.setVectorizationEnabled(true); + Map opRules = new LinkedHashMap(); - MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez); + MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTezOrSpark); addMapWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -896,13 +1071,13 @@ private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTask } private void vectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, - boolean isTez) throws SemanticException { + boolean isTezOrSpark) throws SemanticException { LOG.info("Vectorizing MapWork..."); mapWork.setVectorMode(true); Map opRules = new LinkedHashMap(); MapWorkVectorizationNodeProcessor vnp = - new MapWorkVectorizationNodeProcessor(mapWork, isTez, vectorTaskColumnInfo); + new MapWorkVectorizationNodeProcessor(mapWork, isTezOrSpark, vectorTaskColumnInfo); addMapWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderOnceWalker(disp); @@ -923,11 +1098,34 @@ private void vectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskCo return; } - private void convertReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException { + private void setReduceWorkExplainConditions(ReduceWork reduceWork) { + + reduceWork.setVectorizationExamined(true); + + reduceWork.setReduceVectorizationEnabled(isReduceVectorizationEnabled); + reduceWork.setVectorReduceEngine( + HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE)); + } + + private void convertReduceWork(ReduceWork reduceWork) throws SemanticException { + + // Global used when setting errors, etc. + currentBaseWork = reduceWork; + currentBaseWork.setVectorizationEnabled(true); + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); - boolean ret = validateReduceWork(reduceWork, vectorTaskColumnInfo, isTez); + vectorTaskColumnInfo.assume(); + + boolean ret = validateReduceWork(reduceWork, vectorTaskColumnInfo); if (ret) { - vectorizeReduceWork(reduceWork, vectorTaskColumnInfo, isTez); + vectorizeReduceWork(reduceWork, vectorTaskColumnInfo); + } else if (currentBaseWork.getVectorizationEnabled()) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + LOG.info("Cannot vectorize: unknown"); + } else { + LOG.info("Cannot vectorize: " + notVectorizedReason.toString()); + } } } @@ -941,13 +1139,14 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, // Check key ObjectInspector. ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector(); if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) { + setNodeIssue("Key object inspector missing or not StructObjectInspector"); return false; } StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector; List keyFields = keyStructObjectInspector.getAllStructFieldRefs(); - // Tez doesn't use tagging... if (reduceWork.getNeedsTagging()) { + setNodeIssue("Tez doesn't use tagging"); return false; } @@ -955,6 +1154,7 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector(); if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) { + setNodeIssue("Value object inspector missing or not StructObjectInspector"); return false; } StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector; @@ -984,7 +1184,7 @@ private void addReduceWorkRules(Map opRules, NodeProcessor } private boolean validateReduceWork(ReduceWork reduceWork, - VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) throws SemanticException { + VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { LOG.info("Validating ReduceWork..."); @@ -1015,7 +1215,7 @@ private boolean validateReduceWork(ReduceWork reduceWork, } private void vectorizeReduceWork(ReduceWork reduceWork, - VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) throws SemanticException { + VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { LOG.info("Vectorizing ReduceWork..."); reduceWork.setVectorMode(true); @@ -1025,7 +1225,7 @@ private void vectorizeReduceWork(ReduceWork reduceWork, // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. Map opRules = new LinkedHashMap(); ReduceWorkVectorizationNodeProcessor vnp = - new ReduceWorkVectorizationNodeProcessor(vectorTaskColumnInfo, isTez); + new ReduceWorkVectorizationNodeProcessor(vectorTaskColumnInfo); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); @@ -1053,7 +1253,7 @@ private void vectorizeReduceWork(ReduceWork reduceWork, class MapWorkValidationNodeProcessor implements NodeProcessor { private final MapWork mapWork; - private final boolean isTez; + private final boolean isTezOrSpark; // Children of Vectorized GROUPBY that outputs rows instead of vectorized row batchs. protected final Set> nonVectorizedOps = @@ -1063,9 +1263,9 @@ private void vectorizeReduceWork(ReduceWork reduceWork, return nonVectorizedOps; } - public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) { + public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTezOrSpark) { this.mapWork = mapWork; - this.isTez = isTez; + this.isTezOrSpark = isTezOrSpark; } @Override @@ -1077,13 +1277,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return new Boolean(true); } boolean ret; + currentOperator = op; try { - ret = validateMapWorkOperator(op, mapWork, isTez); + ret = validateMapWorkOperator(op, mapWork, isTezOrSpark); } catch (Exception e) { throw new SemanticException(e); } if (!ret) { - LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); } // When Vectorized GROUPBY outputs rows instead of vectorized row batches, we don't @@ -1119,9 +1319,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (nonVectorizedOps.contains(op)) { return new Boolean(true); } + currentOperator = op; boolean ret = validateReduceWorkOperator(op); if (!ret) { - LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); } // When Vectorized GROUPBY outputs rows instead of vectorized row batches, we don't @@ -1142,9 +1342,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // The vectorization context for the Map or Reduce task. protected VectorizationContext taskVectorizationContext; + protected final VectorTaskColumnInfo vectorTaskColumnInfo; protected final Set> nonVectorizedOps; - VectorizationNodeProcessor(Set> nonVectorizedOps) { + VectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo, + Set> nonVectorizedOps) { + this.vectorTaskColumnInfo = vectorTaskColumnInfo; this.nonVectorizedOps = nonVectorizedOps; } @@ -1192,11 +1395,11 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac } public Operator doVectorize(Operator op, - VectorizationContext vContext, boolean isTez) throws SemanticException { + VectorizationContext vContext, boolean isTezOrSpark) throws SemanticException { Operator vectorOp = op; try { if (!opsDone.contains(op)) { - vectorOp = vectorizeOperator(op, vContext, isTez); + vectorOp = vectorizeOperator(op, vContext, isTezOrSpark, vectorTaskColumnInfo); opsDone.add(op); if (vectorOp != op) { opToVectorOpMap.put(op, vectorOp); @@ -1220,14 +1423,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private final MapWork mWork; private final VectorTaskColumnInfo vectorTaskColumnInfo; - private final boolean isTez; + private final boolean isTezOrSpark; - public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez, + public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) { - super(vectorTaskColumnInfo.getNonVectorizedOps()); + super(vectorTaskColumnInfo, vectorTaskColumnInfo.getNonVectorizedOps()); this.mWork = mWork; this.vectorTaskColumnInfo = vectorTaskColumnInfo; - this.isTez = isTez; + this.isTezOrSpark = isTezOrSpark; } @Override @@ -1241,6 +1444,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, VectorizationContext vContext = null; + currentOperator = op; if (op instanceof TableScanOperator) { if (taskVectorizationContext == null) { taskVectorizationContext = getVectorizationContext(op.getName(), vectorTaskColumnInfo); @@ -1261,7 +1465,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + " using vectorization context" + vContext.toString()); } - Operator vectorOp = doVectorize(op, vContext, isTez); + Operator vectorOp = doVectorize(op, vContext, isTezOrSpark); if (LOG.isDebugEnabled()) { if (vectorOp instanceof VectorizationContextRegion) { @@ -1279,7 +1483,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private final VectorTaskColumnInfo vectorTaskColumnInfo; - private final boolean isTez; private Operator rootVectorOp; @@ -1287,13 +1490,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return rootVectorOp; } - public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo, - boolean isTez) { + public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo) { - super(vectorTaskColumnInfo.getNonVectorizedOps()); + super(vectorTaskColumnInfo, vectorTaskColumnInfo.getNonVectorizedOps()); this.vectorTaskColumnInfo = vectorTaskColumnInfo; rootVectorOp = null; - this.isTez = isTez; } @Override @@ -1309,6 +1510,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean saveRootVectorOp = false; + currentOperator = op; if (op.getParentOperators().size() == 0) { LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + vectorTaskColumnInfo.allColumnNames.toString()); @@ -1333,7 +1535,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, assert vContext != null; LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); - Operator vectorOp = doVectorize(op, vContext, isTez); + Operator vectorOp = doVectorize(op, vContext, true); if (LOG.isDebugEnabled()) { if (vectorOp instanceof VectorizationContextRegion) { @@ -1390,6 +1592,10 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE); + isReduceVectorizationEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -1407,18 +1613,32 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE return physicalContext; } - boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTez) { - boolean ret = false; + private void setOperatorNotSupported(Operator op) { + OperatorDesc desc = op.getConf(); + Annotation note = AnnotationUtils.getAnnotation(desc.getClass(), Explain.class); + if (note != null) { + Explain explainNote = (Explain) note; + setNodeIssue(explainNote.displayName() + " (" + op.getType() + ") not supported"); + } else { + setNodeIssue("Operator " + op.getType() + " not supported"); + } + } + + boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTezOrSpark) { + boolean ret; switch (op.getType()) { case MAPJOIN: if (op instanceof MapJoinOperator) { ret = validateMapJoinOperator((MapJoinOperator) op); } else if (op instanceof SMBMapJoinOperator) { ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op); + } else { + setOperatorNotSupported(op); + ret = false; } break; case GROUPBY: - ret = validateGroupByOperator((GroupByOperator) op, false, isTez); + ret = validateGroupByOperator((GroupByOperator) op, false, isTezOrSpark); break; case FILTER: ret = validateFilterOperator((FilterOperator) op); @@ -1443,6 +1663,7 @@ boolean validateMapWorkOperator(Operator op, MapWork mWo validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); break; default: + setOperatorNotSupported(op); ret = false; break; } @@ -1450,7 +1671,7 @@ boolean validateMapWorkOperator(Operator op, MapWork mWo } boolean validateReduceWorkOperator(Operator op) { - boolean ret = false; + boolean ret; switch (op.getType()) { case MAPJOIN: // Does MAPJOIN actually get planned in Reduce? @@ -1458,6 +1679,9 @@ boolean validateReduceWorkOperator(Operator op) { ret = validateMapJoinOperator((MapJoinOperator) op); } else if (op instanceof SMBMapJoinOperator) { ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op); + } else { + setOperatorNotSupported(op); + ret = false; } break; case GROUPBY: @@ -1465,6 +1689,7 @@ boolean validateReduceWorkOperator(Operator op) { HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) { ret = validateGroupByOperator((GroupByOperator) op, true, true); } else { + setNodeIssue("Operator " + op.getType() + " not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED.name() + "=true IS false)"); ret = false; } break; @@ -1490,6 +1715,7 @@ boolean validateReduceWorkOperator(Operator op) { validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); break; default: + setOperatorNotSupported(op); ret = false; break; } @@ -1512,7 +1738,7 @@ private Boolean isVectorizedGroupByThatOutputsRows(Operator filterExprs = desc.getFilters().get(posBigTable); - if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) { - LOG.info("Cannot vectorize map work filter expression"); + if (!validateExprNodeDesc(filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER)) { return false; } List keyExprs = desc.getKeys().get(posBigTable); - if (!validateExprNodeDesc(keyExprs)) { - LOG.info("Cannot vectorize map work key expression"); + if (!validateExprNodeDesc(keyExprs, "Key")) { return false; } List valueExprs = desc.getExprs().get(posBigTable); - if (!validateExprNodeDesc(valueExprs)) { - LOG.info("Cannot vectorize map work value expression"); + if (!validateExprNodeDesc(valueExprs, "Value")) { return false; } Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); - if (!validateExprNodeDesc(smallTableExprs)) { - LOG.info("Cannot vectorize map work small table expression"); + if (!validateExprNodeDesc(smallTableExprs, "Small Table")) { return false; } return true; @@ -1571,24 +1794,23 @@ private boolean validateSparkHashTableSinkOperator(SparkHashTableSinkOperator op List filterExprs = desc.getFilters().get(tag); List keyExprs = desc.getKeys().get(tag); List valueExprs = desc.getExprs().get(tag); - return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) && - validateExprNodeDesc(keyExprs) && validateExprNodeDesc(valueExprs); + return validateExprNodeDesc(filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER) && + validateExprNodeDesc(keyExprs, "Key") && validateExprNodeDesc(valueExprs, "Value"); } private boolean validateReduceSinkOperator(ReduceSinkOperator op) { List keyDescs = op.getConf().getKeyCols(); List partitionDescs = op.getConf().getPartitionCols(); List valueDesc = op.getConf().getValueCols(); - return validateExprNodeDesc(keyDescs) && validateExprNodeDesc(partitionDescs) && - validateExprNodeDesc(valueDesc); + return validateExprNodeDesc(keyDescs, "Key") && validateExprNodeDesc(partitionDescs, "Partition") && + validateExprNodeDesc(valueDesc, "Value"); } private boolean validateSelectOperator(SelectOperator op) { List descList = op.getConf().getColList(); for (ExprNodeDesc desc : descList) { - boolean ret = validateExprNodeDesc(desc); + boolean ret = validateExprNodeDesc(desc, "Select"); if (!ret) { - LOG.info("Cannot vectorize select expression: " + desc.toString()); return false; } } @@ -1597,28 +1819,26 @@ private boolean validateSelectOperator(SelectOperator op) { private boolean validateFilterOperator(FilterOperator op) { ExprNodeDesc desc = op.getConf().getPredicate(); - return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.FILTER); + return validateExprNodeDesc(desc, "Predicate", VectorExpressionDescriptor.Mode.FILTER); } - private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTez) { + private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTezOrSpark) { GroupByDesc desc = op.getConf(); - VectorGroupByDesc vectorDesc = desc.getVectorDesc(); if (desc.isGroupingSetsPresent()) { - LOG.info("Grouping sets not supported in vector mode"); + setOperatorIssue("Grouping sets not supported"); return false; } if (desc.pruneGroupingSetId()) { - LOG.info("Pruning grouping set id not supported in vector mode"); + setOperatorIssue("Pruning grouping set id not supported"); return false; } if (desc.getMode() != GroupByDesc.Mode.HASH && desc.isDistinct()) { - LOG.info("DISTINCT not supported in vector mode"); + setOperatorIssue("DISTINCT not supported"); return false; } - boolean ret = validateExprNodeDesc(desc.getKeys()); + boolean ret = validateExprNodeDesc(desc.getKeys(), "Key"); if (!ret) { - LOG.info("Cannot vectorize groupby key expression " + desc.getKeys().toString()); return false; } @@ -1731,6 +1951,9 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo // If all the aggregation outputs are primitive, we can output VectorizedRowBatch. // Otherwise, we the rest of the operator tree will be row mode. + VectorGroupByDesc vectorDesc = new VectorGroupByDesc(); + desc.setVectorDesc(vectorDesc); + vectorDesc.setVectorOutput(retPair.right); vectorDesc.setProcessingMode(processingMode); @@ -1745,14 +1968,15 @@ private boolean validateFileSinkOperator(FileSinkOperator op) { return true; } - private boolean validateExprNodeDesc(List descs) { - return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION); + private boolean validateExprNodeDesc(List descs, String expressionTitle) { + return validateExprNodeDesc(descs, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); } private boolean validateExprNodeDesc(List descs, + String expressionTitle, VectorExpressionDescriptor.Mode mode) { for (ExprNodeDesc d : descs) { - boolean ret = validateExprNodeDesc(d, mode); + boolean ret = validateExprNodeDesc(d, expressionTitle, mode); if (!ret) { return false; } @@ -1775,19 +1999,20 @@ private boolean validateExprNodeDesc(List descs, return new Pair(true, outputIsPrimitive); } - private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { + private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, + VectorExpressionDescriptor.Mode mode) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; // Currently, we do not support vectorized virtual columns (see HIVE-5570). if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { - LOG.info("Cannot vectorize virtual column " + c.getColumn()); + setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")"); return false; } } String typeName = desc.getTypeInfo().getTypeName(); boolean ret = validateDataType(typeName, mode); if (!ret) { - LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName); + setExpressionIssue(expressionTitle, "Data type " + typeName + " of " + desc.toString() + " not supported"); return false; } boolean isInExpression = false; @@ -1795,7 +2020,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc; boolean r = validateGenericUdf(d); if (!r) { - LOG.info("Cannot vectorize UDF " + d); + setExpressionIssue(expressionTitle, "UDF " + d + " not supported"); return false; } GenericUDF genericUDF = d.getGenericUDF(); @@ -1806,14 +2031,14 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } else { for (ExprNodeDesc d : desc.getChildren()) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateExprNodeDescRecursive(d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } @@ -1823,7 +2048,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio } private boolean validateStructInExpression(ExprNodeDesc desc, - VectorExpressionDescriptor.Mode mode) { + String expressionTitle, VectorExpressionDescriptor.Mode mode) { for (ExprNodeDesc d : desc.getChildren()) { TypeInfo typeInfo = d.getTypeInfo(); if (typeInfo.getCategory() != Category.STRUCT) { @@ -1839,7 +2064,8 @@ private boolean validateStructInExpression(ExprNodeDesc desc, TypeInfo fieldTypeInfo = fieldTypeInfos.get(f); Category category = fieldTypeInfo.getCategory(); if (category != Category.PRIMITIVE) { - LOG.info("Cannot vectorize struct field " + fieldNames.get(f) + setExpressionIssue(expressionTitle, + "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName()); return false; } @@ -1852,7 +2078,8 @@ private boolean validateStructInExpression(ExprNodeDesc desc, if (inConstantType != InConstantType.INT_FAMILY && inConstantType != InConstantType.FLOAT_FAMILY && inConstantType != InConstantType.STRING_FAMILY) { - LOG.info("Cannot vectorize struct field " + fieldNames.get(f) + setExpressionIssue(expressionTitle, + "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName()); return false; } @@ -1861,31 +2088,28 @@ private boolean validateStructInExpression(ExprNodeDesc desc, return true; } - private boolean validateExprNodeDesc(ExprNodeDesc desc) { - return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.PROJECTION); + private boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle) { + return validateExprNodeDesc(desc, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); } - boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { - if (!validateExprNodeDescRecursive(desc, mode)) { + boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle, + VectorExpressionDescriptor.Mode mode) { + if (!validateExprNodeDescRecursive(desc, expressionTitle, mode)) { return false; } try { VectorizationContext vc = new ValidatorVectorizationContext(hiveConf); if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getVectorExpression returned null"); + setExpressionIssue(expressionTitle, "getVectorExpression returned null"); return false; } } catch (Exception e) { if (e instanceof HiveException) { - LOG.info(e.getMessage()); + setExpressionIssue(expressionTitle, e.getMessage()); } else { - if (LOG.isDebugEnabled()) { - // Show stack trace. - LOG.debug("Failed to vectorize", e); - } else { - LOG.info("Failed to vectorize", e.getMessage()); - } + String issue = "exception: " + VectorizationContext.getStackTraceAsSingleLine(e); + setExpressionIssue(expressionTitle, issue); } return false; } @@ -1905,9 +2129,9 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { } } - private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorAggrExpr) { + public static ObjectInspector.Category aggregationOutputCategory(VectorAggregateExpression vectorAggrExpr) { ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); - return (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE); + return outputObjInspector.getCategory(); } private Pair validateAggregationDesc(AggregationDesc aggDesc, ProcessingMode processingMode, @@ -1915,11 +2139,10 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA String udfName = aggDesc.getGenericUDAFName().toLowerCase(); if (!supportedAggregationUdfs.contains(udfName)) { - LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); + setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported"); return new Pair(false, false); } - if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { - LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); + if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) { return new Pair(false, false); } @@ -1933,6 +2156,7 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA if (LOG.isDebugEnabled()) { LOG.debug("Vectorization of aggreation should have succeeded ", e); } + setExpressionIssue("Aggregation Function", "Vectorization of aggreation should have succeeded " + e); return new Pair(false, false); } if (LOG.isDebugEnabled()) { @@ -1940,11 +2164,12 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA " vector expression " + vectorAggrExpr.toString()); } - boolean outputIsPrimitive = validateAggregationIsPrimitive(vectorAggrExpr); + ObjectInspector.Category outputCategory = aggregationOutputCategory(vectorAggrExpr); + boolean outputIsPrimitive = (outputCategory == ObjectInspector.Category.PRIMITIVE); if (processingMode == ProcessingMode.MERGE_PARTIAL && hasKeys && !outputIsPrimitive) { - LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); + setOperatorIssue("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); return new Pair(false, false); } @@ -2012,12 +2237,12 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { if (smallTableIndices[i] < 0) { // Negative numbers indicate a column to be (deserialize) read from the small table's // LazyBinary value row. - LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false"); + setOperatorIssue("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false"); return false; } } } else if (smallTableRetainSize > 0) { - LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false"); + setOperatorIssue("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false"); return false; } @@ -2026,20 +2251,21 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { } Operator specializeMapJoinOperator(Operator op, - VectorizationContext vContext, MapJoinDesc desc) throws HiveException { + VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinInfo vectorMapJoinInfo) + throws HiveException { Operator vectorOp = null; Class> opClass = null; - VectorMapJoinDesc.HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; - VectorMapJoinDesc.HashTableKind hashTableKind = HashTableKind.NONE; - VectorMapJoinDesc.HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); + + HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; + HashTableKind hashTableKind = HashTableKind.NONE; + HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; + OperatorVariation operatorVariation = OperatorVariation.NONE; - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { + if (vectorDesc.getIsFastHashTableEnabled()) { hashTableImplementationType = HashTableImplementationType.FAST; } else { - // Restrict to using BytesBytesMultiHashMap via MapJoinBytesTableContainer or - // HybridHashTableContainer. hashTableImplementationType = HashTableImplementationType.OPTIMIZED; } @@ -2061,20 +2287,31 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { Map> keyExprs = desc.getKeys(); List bigTableKeyExprs = keyExprs.get(posBigTable); if (bigTableKeyExprs.size() == 1) { - String typeName = bigTableKeyExprs.get(0).getTypeString(); - LOG.info("Vectorizer vectorizeOperator map join typeName " + typeName); - if (typeName.equals("boolean")) { + TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo(); + LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName()); + switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { + case BOOLEAN: hashTableKeyType = HashTableKeyType.BOOLEAN; - } else if (typeName.equals("tinyint")) { + break; + case BYTE: hashTableKeyType = HashTableKeyType.BYTE; - } else if (typeName.equals("smallint")) { + break; + case SHORT: hashTableKeyType = HashTableKeyType.SHORT; - } else if (typeName.equals("int")) { + break; + case INT: hashTableKeyType = HashTableKeyType.INT; - } else if (typeName.equals("bigint") || typeName.equals("long")) { + break; + case LONG: hashTableKeyType = HashTableKeyType.LONG; - } else if (VectorizationContext.isStringFamily(typeName)) { + break; + case STRING: + case CHAR: + case VARCHAR: + case BINARY: hashTableKeyType = HashTableKeyType.STRING; + default: + // Stay with multi-key. } } } @@ -2082,16 +2319,20 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { switch (joinType) { case JoinDesc.INNER_JOIN: if (!isInnerBigOnly) { + operatorVariation = OperatorVariation.INNER; hashTableKind = HashTableKind.HASH_MAP; } else { + operatorVariation = OperatorVariation.INNER_BIG_ONLY; hashTableKind = HashTableKind.HASH_MULTISET; } break; case JoinDesc.LEFT_OUTER_JOIN: case JoinDesc.RIGHT_OUTER_JOIN: + operatorVariation = OperatorVariation.OUTER; hashTableKind = HashTableKind.HASH_MAP; break; case JoinDesc.LEFT_SEMI_JOIN: + operatorVariation = OperatorVariation.LEFT_SEMI; hashTableKind = HashTableKind.HASH_SET; break; default: @@ -2106,86 +2347,84 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case SHORT: case INT: case LONG: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerLongOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyLongOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerLongOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterLongOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyLongOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiLongOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterLongOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; case STRING: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerStringOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyStringOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerStringOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterStringOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyStringOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiStringOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterStringOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; case MULTI_KEY: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerMultiKeyOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerMultiKeyOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterMultiKeyOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiMultiKeyOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterMultiKeyOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); } - vectorOp = OperatorFactory.getVectorOperator( - opClass, op.getCompilationOpContext(), op.getConf(), vContext); - LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName()); - boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); vectorDesc.setHashTableImplementationType(hashTableImplementationType); vectorDesc.setHashTableKind(hashTableKind); vectorDesc.setHashTableKeyType(hashTableKeyType); + vectorDesc.setOperatorVariation(operatorVariation); vectorDesc.setMinMaxEnabled(minMaxEnabled); + vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), op.getConf(), vContext); + LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName()); + return vectorOp; } - private boolean onExpressionHasNullSafes(MapJoinDesc desc) { + public static boolean onExpressionHasNullSafes(MapJoinDesc desc) { boolean[] nullSafes = desc.getNullSafes(); if (nullSafes == null) { - return false; + return false; } for (boolean nullSafe : nullSafes) { if (nullSafe) { @@ -2196,53 +2435,372 @@ private boolean onExpressionHasNullSafes(MapJoinDesc desc) { } private boolean canSpecializeMapJoin(Operator op, MapJoinDesc desc, - boolean isTez) { + boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinInfo vectorMapJoinInfo) + throws HiveException { + + Preconditions.checkState(op instanceof MapJoinOperator); + + // Allocate a VectorReduceSinkDesc initially with implementation type NONE so EXPLAIN + // can report this operator was vectorized, but not native. And, the conditions. + VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc(); + desc.setVectorDesc(vectorDesc); + + boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED); + + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + + boolean oneMapJoinCondition = (desc.getConds().length == 1); + + boolean hasNullSafes = onExpressionHasNullSafes(desc); + + byte posBigTable = (byte) desc.getPosBigTable(); + + // Since we want to display all the met and not met conditions in EXPLAIN, we determine all + // information first.... + + List keyDesc = desc.getKeys().get(posBigTable); + VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); + final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length; + boolean isEmptyKey = (allBigTableKeyExpressionsLength == 0); - boolean specialize = false; + boolean supportsKeyTypes = true; // Assume. + HashSet notSupportedKeyTypes = new HashSet(); - if (op instanceof MapJoinOperator && + // Since a key expression can be a calculation and the key will go into a scratch column, + // we need the mapping and type information. + int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength]; + String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength]; + TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength]; + ArrayList bigTableKeyExpressionsList = new ArrayList(); + VectorExpression[] bigTableKeyExpressions; + for (int i = 0; i < allBigTableKeyExpressionsLength; i++) { + VectorExpression ve = allBigTableKeyExpressions[i]; + if (!IdentityExpression.isColumnOnly(ve)) { + bigTableKeyExpressionsList.add(ve); + } + bigTableKeyColumnMap[i] = ve.getOutputColumn(); + + ExprNodeDesc exprNode = keyDesc.get(i); + bigTableKeyColumnNames[i] = exprNode.toString(); + + TypeInfo typeInfo = exprNode.getTypeInfo(); + // Verify we handle the key column types for an optimized table. This is the effectively the + // same check used in HashTableLoader. + if (!MapJoinKey.isSupportedField(typeInfo)) { + supportsKeyTypes = false; + Category category = typeInfo.getCategory(); + notSupportedKeyTypes.add( + (category != Category.PRIMITIVE ? category.toString() : + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString())); + } + bigTableKeyTypeInfos[i] = typeInfo; + } + if (bigTableKeyExpressionsList.size() == 0) { + bigTableKeyExpressions = null; + } else { + bigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]); + } + + List bigTableExprs = desc.getExprs().get(posBigTable); + VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); + + boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED)) { + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED); + vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled); - // Currently, only under Tez and non-N-way joins. - if (isTez && desc.getConds().length == 1 && !onExpressionHasNullSafes(desc)) { + // Especially since LLAP is prone to turn it off in the MapJoinDesc in later + // physical optimizer stages... + boolean isHybridHashJoin = desc.isHybridHashJoin(); + vectorDesc.setIsHybridHashJoin(isHybridHashJoin); - // Ok, all basic restrictions satisfied so far... - specialize = true; + /* + * Populate vectorMapJoininfo. + */ - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { + /* + * Similarly, we need a mapping since a value expression can be a calculation and the value + * will go into a scratch column. + */ + int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length]; + String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; + TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length]; + ArrayList bigTableValueExpressionsList = new ArrayList(); + VectorExpression[] bigTableValueExpressions; + for (int i = 0; i < bigTableValueColumnMap.length; i++) { + VectorExpression ve = allBigTableValueExpressions[i]; + if (!IdentityExpression.isColumnOnly(ve)) { + bigTableValueExpressionsList.add(ve); + } + bigTableValueColumnMap[i] = ve.getOutputColumn(); - // We are using the optimized hash table we have further - // restrictions (using optimized and key type). + ExprNodeDesc exprNode = bigTableExprs.get(i); + bigTableValueColumnNames[i] = exprNode.toString(); + bigTableValueTypeInfos[i] = exprNode.getTypeInfo(); + } + if (bigTableValueExpressionsList.size() == 0) { + bigTableValueExpressions = null; + } else { + bigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]); + } - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE)) { - specialize = false; - } else { - byte posBigTable = (byte) desc.getPosBigTable(); - Map> keyExprs = desc.getKeys(); - List bigTableKeyExprs = keyExprs.get(posBigTable); - for (ExprNodeDesc exprNodeDesc : bigTableKeyExprs) { - String typeName = exprNodeDesc.getTypeString(); - if (!MapJoinKey.isSupportedField(typeName)) { - specialize = false; - break; - } + vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap); + vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames); + vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos); + vectorMapJoinInfo.setBigTableKeyExpressions(bigTableKeyExpressions); + + vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap); + vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames); + vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos); + vectorMapJoinInfo.setBigTableValueExpressions(bigTableValueExpressions); + + /* + * Small table information. + */ + VectorColumnOutputMapping bigTableRetainedMapping = + new VectorColumnOutputMapping("Big Table Retained Mapping"); + + VectorColumnOutputMapping bigTableOuterKeyMapping = + new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + + // The order of the fields in the LazyBinary small table value must be used, so + // we use the source ordering flavor for the mapping. + VectorColumnSourceMapping smallTableMapping = + new VectorColumnSourceMapping("Small Table Mapping"); + + Byte[] order = desc.getTagOrder(); + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); + boolean isOuterJoin = !desc.getNoOuterJoin(); + + /* + * Gather up big and small table output result information from the MapJoinDesc. + */ + List bigTableRetainList = desc.getRetainList().get(posBigTable); + int bigTableRetainSize = bigTableRetainList.size(); + + int[] smallTableIndices; + int smallTableIndicesSize; + List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); + if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); + smallTableIndicesSize = smallTableIndices.length; + } else { + smallTableIndices = null; + smallTableIndicesSize = 0; + } + + List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); + int smallTableRetainSize = smallTableRetainList.size(); + + int smallTableResultSize = 0; + if (smallTableIndicesSize > 0) { + smallTableResultSize = smallTableIndicesSize; + } else if (smallTableRetainSize > 0) { + smallTableResultSize = smallTableRetainSize; + } + + /* + * Determine the big table retained mapping first so we can optimize out (with + * projection) copying inner join big table keys in the subsequent small table results section. + */ + + // We use a mapping object here so we can build the projection in any order and + // get the ordered by 0 to n-1 output columns at the end. + // + // Also, to avoid copying a big table key into the small table result area for inner joins, + // we reference it with the projection so there can be duplicate output columns + // in the projection. + VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); + + int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + for (int i = 0; i < bigTableRetainSize; i++) { + + // Since bigTableValueExpressions may do a calculation and produce a scratch column, we + // need to map to the right batch column. + + int retainColumn = bigTableRetainList.get(i); + int batchColumnIndex = bigTableValueColumnMap[retainColumn]; + TypeInfo typeInfo = bigTableValueTypeInfos[i]; + + // With this map we project the big table batch to make it look like an output batch. + projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo); + + // Collect columns we copy from the big table batch to the overflow batch. + if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { + // Tolerate repeated use of a big table column. + bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + } + + nextOutputColumn++; + } + + /* + * Now determine the small table results. + */ + boolean smallTableExprVectorizes = true; + + int firstSmallTableOutputColumn; + firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0); + int smallTableOutputCount = 0; + nextOutputColumn = firstSmallTableOutputColumn; + + // Small table indices has more information (i.e. keys) than retain, so use it if it exists... + String[] bigTableRetainedNames; + if (smallTableIndicesSize > 0) { + smallTableOutputCount = smallTableIndicesSize; + bigTableRetainedNames = new String[smallTableOutputCount]; + + for (int i = 0; i < smallTableIndicesSize; i++) { + if (smallTableIndices[i] >= 0) { + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + int keyIndex = smallTableIndices[i]; + + // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we + // need to map the right column. + int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; + bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex]; + TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex]; + + if (!isOuterJoin) { + + // Optimize inner join keys of small table results. + + // Project the big table key into the small table result "area". + projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo); + + if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { + // If necessary, copy the big table key into the overflow batch's small table + // result "area". + bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo); } + } else { + + // For outer joins, since the small table key can be null when there is no match, + // we must have a physical (scratch) column for those keys. We cannot use the + // projection optimization used by inner joins above. + + int scratchColumn = vContext.allocateScratchColumn(typeInfo.getTypeName()); + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo); + + bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo); } } else { - // With the fast hash table implementation, we currently do not support - // Hybrid Grace Hash Join. + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + int smallTableValueIndex = -smallTableIndices[i] - 1; - if (desc.isHybridHashJoin()) { - specialize = false; + ExprNodeDesc smallTableExprNode = smallTableExprs.get(i); + if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) { + clearNotVectorizedReason(); + smallTableExprVectorizes = false; } + + bigTableRetainedNames[i] = smallTableExprNode.toString(); + + TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); + + // Make a new big table scratch column for the small table value. + int scratchColumn = vContext.allocateScratchColumn(typeInfo.getTypeName()); + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + } + nextOutputColumn++; + } + } else if (smallTableRetainSize > 0) { + smallTableOutputCount = smallTableRetainSize; + bigTableRetainedNames = new String[smallTableOutputCount]; + + // Only small table values appear in join output result. + + for (int i = 0; i < smallTableRetainSize; i++) { + int smallTableValueIndex = smallTableRetainList.get(i); + + ExprNodeDesc smallTableExprNode = smallTableExprs.get(i); + if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) { + clearNotVectorizedReason(); + smallTableExprVectorizes = false; } + + bigTableRetainedNames[i] = smallTableExprNode.toString(); + + // Make a new big table scratch column for the small table value. + TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); + int scratchColumn = vContext.allocateScratchColumn(typeInfo.getTypeName()); + + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + nextOutputColumn++; } + } else { + bigTableRetainedNames = new String[0]; + } + + // Remember the condition variables for EXPLAIN regardless. + vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled); + vectorDesc.setEngine(engine); + vectorDesc.setOneMapJoinCondition(oneMapJoinCondition); + vectorDesc.setHasNullSafes(hasNullSafes); + vectorDesc.setSupportsKeyTypes(supportsKeyTypes); + if (!supportsKeyTypes) { + vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes)); + } + vectorDesc.setIsEmptyKey(isEmptyKey); + vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes); + + // Currently, only under Tez and non-N-way joins. + if (!isVectorizationMapJoinNativeEnabled || + !isTezOrSpark || + !oneMapJoinCondition || + hasNullSafes || + !supportsKeyTypes || + isEmptyKey || + !smallTableExprVectorizes) { + return false; } - return specialize; + + if (!isFastHashTableEnabled) { + + // No restrictions for optimized hash table. + + } else { + + // With the fast hash table implementation, we currently do not support + // Hybrid Grace Hash Join. + + if (isHybridHashJoin) { + return false; + } + } + + // Convert dynamic arrays and maps to simple arrays. + + bigTableRetainedMapping.finalize(); + + bigTableOuterKeyMapping.finalize(); + + smallTableMapping.finalize(); + + vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); + vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); + vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + + projectionMapping.finalize(); + + // Verify we added an entry for each output. + assert projectionMapping.isSourceSequenceGood(); + + vectorMapJoinInfo.setProjectionMapping(projectionMapping); + + return true; } private Operator specializeReduceSinkOperator( @@ -2302,8 +2860,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType); } - VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc(); - desc.setVectorDesc(vectorDesc); + VectorReduceSinkDesc vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc(); + vectorDesc.setReduceSinkKeyType(reduceSinkKeyType); vectorDesc.setVectorReduceSinkInfo(vectorReduceSinkInfo); @@ -2315,51 +2873,60 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi } private boolean canSpecializeReduceSink(ReduceSinkDesc desc, - boolean isTez, VectorizationContext vContext, + boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException { - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED)) { - return false; - } + // Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this + // operator was vectorized, but not native. And, the conditions. + VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc(); + desc.setVectorDesc(vectorDesc); - // Many restrictions. + boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED); - if (!isTez) { - return false; - } + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); - if (desc.getWriteType() == AcidUtils.Operation.UPDATE || - desc.getWriteType() == AcidUtils.Operation.DELETE) { - return false; - } + boolean acidChange = + desc.getWriteType() == AcidUtils.Operation.UPDATE || + desc.getWriteType() == AcidUtils.Operation.DELETE; - if (desc.getBucketCols() != null && !desc.getBucketCols().isEmpty()) { - return false; - } + boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty(); - boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM); - if (!useUniformHash) { - return false; - } + boolean hasTopN = desc.getTopN() >= 0; - if (desc.getTopN() >= 0) { - return false; - } + boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM); - if (desc.getDistinctColumnIndices().size() > 0) { - return false; - } + boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0; TableDesc keyTableDesc = desc.getKeySerializeInfo(); Class keySerializerClass = keyTableDesc.getDeserializerClass(); - if (keySerializerClass != org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class) { - return false; - } + boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class); TableDesc valueTableDesc = desc.getValueSerializeInfo(); Class valueDeserializerClass = valueTableDesc.getDeserializerClass(); - if (valueDeserializerClass != org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class) { + boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class); + + // Remember the condition variables for EXPLAIN regardless. + vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled); + vectorDesc.setEngine(engine); + vectorDesc.setAcidChange(acidChange); + vectorDesc.setHasBuckets(hasBuckets); + vectorDesc.setHasTopN(hasTopN); + vectorDesc.setUseUniformHash(useUniformHash); + vectorDesc.setHasDistinctColumns(hasDistinctColumns); + vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable); + vectorDesc.setIsValueLazyBinary(isValueLazyBinary); + + // Many restrictions. + if (!isVectorizationReduceSinkNativeEnabled || + !isTezOrSpark || + acidChange || + hasBuckets || + hasTopN || + !useUniformHash || + hasDistinctColumns || + !isKeyBinarySortable || + !isValueLazyBinary) { return false; } @@ -2429,21 +2996,136 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, return true; } - Operator vectorizeOperator(Operator op, - VectorizationContext vContext, boolean isTez) throws HiveException { + private boolean usesVectorUDFAdaptor(VectorExpression vecExpr) { + if (vecExpr == null) { + return false; + } + if (vecExpr instanceof VectorUDFAdaptor) { + return true; + } + if (usesVectorUDFAdaptor(vecExpr.getChildExpressions())) { + return true; + } + return false; + } + + private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { + if (vecExprs == null) { + return false; + } + for (VectorExpression vecExpr : vecExprs) { + if (usesVectorUDFAdaptor(vecExpr)) { + return true; + } + } + return false; + } + + public static Operator vectorizeTableScanOperator( + Operator tableScanOp, VectorizationContext vContext) + throws HiveException { + TableScanDesc tableScanDesc = (TableScanDesc) tableScanOp.getConf(); + VectorTableScanDesc vectorTableScanDesc = new VectorTableScanDesc(); + tableScanDesc.setVectorDesc(vectorTableScanDesc); + vectorTableScanDesc.setProjectedOutputColumns( + ArrayUtils.toPrimitive(vContext.getProjectedColumns().toArray(new Integer[0]))); + return tableScanOp; + } + + public static Operator vectorizeFilterOperator( + Operator filterOp, VectorizationContext vContext) + throws HiveException { + FilterDesc filterDesc = (FilterDesc) filterOp.getConf(); + VectorFilterDesc vectorFilterDesc = new VectorFilterDesc(); + filterDesc.setVectorDesc(vectorFilterDesc); + ExprNodeDesc predicateExpr = filterDesc.getPredicate(); + VectorExpression vectorPredicateExpr = + vContext.getVectorExpression(predicateExpr, VectorExpressionDescriptor.Mode.FILTER); + vectorFilterDesc.setPredicateExpression(vectorPredicateExpr); + return OperatorFactory.getVectorOperator( + filterOp.getCompilationOpContext(), filterDesc, vContext); + } + + /* + * NOTE: The VectorGroupByDesc has already been allocated and partially populated. + */ + public static Operator vectorizeGroupByOperator( + Operator groupByOp, VectorizationContext vContext) + throws HiveException { + GroupByDesc groupByDesc = (GroupByDesc) groupByOp.getConf(); + List keysDesc = groupByDesc.getKeys(); + VectorExpression[] vecKeyExpressions = vContext.getVectorExpressions(keysDesc); + ArrayList aggrDesc = groupByDesc.getAggregators(); + final int size = aggrDesc.size(); + VectorAggregateExpression[] vecAggregators = new VectorAggregateExpression[size]; + int[] projectedOutputColumns = new int[size]; + for (int i = 0; i < size; ++i) { + AggregationDesc aggDesc = aggrDesc.get(i); + vecAggregators[i] = vContext.getAggregatorExpression(aggDesc); + + // GroupBy generates a new vectorized row batch... + projectedOutputColumns[i] = i; + } + VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) groupByDesc.getVectorDesc(); + vectorGroupByDesc.setKeyExpressions(vecKeyExpressions); + vectorGroupByDesc.setAggregators(vecAggregators); + vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns); + return OperatorFactory.getVectorOperator( + groupByOp.getCompilationOpContext(), groupByDesc, vContext); + } + + public static Operator vectorizeSelectOperator( + Operator selectOp, VectorizationContext vContext) + throws HiveException { + SelectDesc selectDesc = (SelectDesc) selectOp.getConf(); + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + selectDesc.setVectorDesc(vectorSelectDesc); + List colList = selectDesc.getColList(); + int index = 0; + final int size = colList.size(); + VectorExpression[] vectorSelectExprs = new VectorExpression[size]; + int[] projectedOutputColumns = new int[size]; + for (int i = 0; i < size; i++) { + ExprNodeDesc expr = colList.get(i); + VectorExpression ve = vContext.getVectorExpression(expr); + projectedOutputColumns[i] = ve.getOutputColumn(); + if (ve instanceof IdentityExpression) { + // Suppress useless evaluation. + continue; + } + vectorSelectExprs[index++] = ve; + } + if (index < size) { + vectorSelectExprs = Arrays.copyOf(vectorSelectExprs, index); + } + vectorSelectDesc.setSelectExpressions(vectorSelectExprs); + vectorSelectDesc.setProjectedOutputColumns(projectedOutputColumns); + return OperatorFactory.getVectorOperator( + selectOp.getCompilationOpContext(), selectDesc, vContext); + } + + public Operator vectorizeOperator(Operator op, + VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) + throws HiveException { Operator vectorOp = null; + boolean isNative; switch (op.getType()) { + case TABLESCAN: + vectorOp = vectorizeTableScanOperator(op, vContext); + isNative = true; + break; case MAPJOIN: { - MapJoinDesc desc = (MapJoinDesc) op.getConf(); - boolean specialize = canSpecializeMapJoin(op, desc, isTez || isSpark); - - if (!specialize) { - - Class> opClass = null; - if (op instanceof MapJoinOperator) { - + if (op instanceof MapJoinOperator) { + VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo(); + MapJoinDesc desc = (MapJoinDesc) op.getConf(); + boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinInfo); + + if (!specialize) { + + Class> opClass = null; + // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered... List bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable()); @@ -2453,20 +3135,36 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, } else { opClass = VectorMapJoinOuterFilteredOperator.class; } - } else if (op instanceof SMBMapJoinOperator) { - opClass = VectorSMBMapJoinOperator.class; + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), op.getConf(), vContext); + isNative = false; + } else { + + // TEMPORARY Until Native Vector Map Join with Hybrid passes tests... + // HiveConf.setBoolVar(physicalContext.getConf(), + // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false); + + vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo); + isNative = true; + + if (vectorTaskColumnInfo != null) { + if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } } - - vectorOp = OperatorFactory.getVectorOperator( - opClass, op.getCompilationOpContext(), op.getConf(), vContext); - } else { - - // TEMPORARY Until Native Vector Map Join with Hybrid passes tests... - // HiveConf.setBoolVar(physicalContext.getConf(), - // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false); - - vectorOp = specializeMapJoinOperator(op, vContext, desc); + Preconditions.checkState(op instanceof SMBMapJoinOperator); + SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf(); + VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc(); + smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), smbJoinSinkDesc, vContext); + isNative = false; } } break; @@ -2475,38 +3173,134 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, { VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo(); ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf(); - boolean specialize = canSpecializeReduceSink(desc, isTez, vContext, vectorReduceSinkInfo); + boolean specialize = canSpecializeReduceSink(desc, isTezOrSpark, vContext, vectorReduceSinkInfo); if (!specialize) { vectorOp = OperatorFactory.getVectorOperator( op.getCompilationOpContext(), op.getConf(), vContext); - + isNative = false; } else { vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo); + isNative = true; + if (vectorTaskColumnInfo != null) { + if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } } } break; - case GROUPBY: case FILTER: + { + vectorOp = vectorizeFilterOperator(op, vContext); + isNative = true; + if (vectorTaskColumnInfo != null) { + VectorFilterDesc vectorFilterDesc = + (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression(); + if (usesVectorUDFAdaptor(vectorPredicateExpr)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + break; case SELECT: + { + vectorOp = vectorizeSelectOperator(op, vContext); + isNative = true; + if (vectorTaskColumnInfo != null) { + VectorSelectDesc vectorSelectDesc = + (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions(); + if (usesVectorUDFAdaptor(vectorSelectExprs)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + break; + case GROUPBY: + { + vectorOp = vectorizeGroupByOperator(op, vContext); + isNative = false; + if (vectorTaskColumnInfo != null) { + VectorGroupByDesc vectorGroupByDesc = + (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + if (!vectorGroupByDesc.isVectorOutput()) { + vectorTaskColumnInfo.setGroupByVectorOutput(false); + } + VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions(); + if (usesVectorUDFAdaptor(vecKeyExpressions)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + for (VectorAggregateExpression vecAggr : vecAggregators) { + if (usesVectorUDFAdaptor(vecAggr.inputExpression())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + + } + break; case FILESINK: + { + FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf(); + VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc(); + fileSinkDesc.setVectorDesc(vectorFileSinkDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), fileSinkDesc, vContext); + isNative = false; + } + break; case LIMIT: - case EXTRACT: + { + LimitDesc limitDesc = (LimitDesc) op.getConf(); + VectorLimitDesc vectorLimitDesc = new VectorLimitDesc(); + limitDesc.setVectorDesc(vectorLimitDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), limitDesc, vContext); + isNative = true; + } + break; case EVENT: + { + AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf(); + VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc(); + eventDesc.setVectorDesc(vectorEventDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), eventDesc, vContext); + isNative = true; + } + break; case HASHTABLESINK: - vectorOp = OperatorFactory.getVectorOperator( - op.getCompilationOpContext(), op.getConf(), vContext); + { + SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf(); + VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc(); + sparkHashTableSinkDesc.setVectorDesc(vectorSparkHashTableSinkDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext); + isNative = true; + } break; default: + // These are children of GROUP BY operators with non-vector outputs. + isNative = false; vectorOp = op; break; } + Preconditions.checkState(vectorOp != null); + if (vectorTaskColumnInfo != null && !isNative) { + vectorTaskColumnInfo.setAllNative(false); + } - LOG.debug("vectorizeOperator " + (vectorOp == null ? "NULL" : vectorOp.getClass().getName())); - LOG.debug("vectorizeOperator " + (vectorOp == null || vectorOp.getConf() == null ? "NULL" : vectorOp.getConf().getClass().getName())); + LOG.debug("vectorizeOperator " + vectorOp.getClass().getName()); + LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName()); if (vectorOp != op) { fixupParentChildOperators(op, vectorOp); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java new file mode 100644 index 0000000..e0a6198 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** + * Why a node did not vectorize. + * + */ +public class VectorizerReason { + + private static long serialVersionUID = 1L; + + public static enum VectorizerNodeIssue { + NONE, + NODE_ISSUE, + OPERATOR_ISSUE, + EXPRESSION_ISSUE + } + + private final VectorizerNodeIssue vectorizerNodeIssue; + + private final Operator operator; + + private final String expressionTitle; + + private final String issue; + + private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue, + Operator operator, String expressionTitle, String issue) { + this.vectorizerNodeIssue = vectorizerNodeIssue; + this.operator = operator; + this.expressionTitle = expressionTitle; + this.issue = issue; + } + + public static VectorizerReason createNodeIssue(String issue) { + return new VectorizerReason( + VectorizerNodeIssue.NODE_ISSUE, + null, + null, + issue); + } + + public static VectorizerReason createOperatorIssue(Operator operator, + String issue) { + return new VectorizerReason( + VectorizerNodeIssue.OPERATOR_ISSUE, + operator, + null, + issue); + } + + public static VectorizerReason createExpressionIssue(Operator operator, + String expressionTitle, String issue) { + return new VectorizerReason( + VectorizerNodeIssue.EXPRESSION_ISSUE, + operator, + expressionTitle, + issue); + } + + @Override + public VectorizerReason clone() { + return new VectorizerReason(vectorizerNodeIssue, operator, expressionTitle, issue); + } + + public VectorizerNodeIssue getVectorizerNodeIssue() { + return vectorizerNodeIssue; + } + + public Operator getOperator() { + return operator; + } + + public String getExpressionTitle() { + return expressionTitle; + } + + public String getIssue() { + return issue; + } + + @Override + public String toString() { + String reason; + switch (vectorizerNodeIssue) { + case NODE_ISSUE: + reason = (issue == null ? "unknown" : issue); + break; + case OPERATOR_ISSUE: + reason = (operator == null ? "Unknown" : operator.getType()) + " operator: " + + (issue == null ? "unknown" : issue); + break; + case EXPRESSION_ISSUE: + reason = expressionTitle + " expression for " + + (operator == null ? "Unknown" : operator.getType()) + " operator: " + + (issue == null ? "unknown" : issue); + break; + default: + reason = "Unknown " + vectorizerNodeIssue; + } + return reason; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 4a8ff15..1f118dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -27,12 +27,27 @@ */ public class ExplainConfiguration { + + public enum VectorizationDetailLevel { + + SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1); + + public final int rank; + VectorizationDetailLevel(int rank) { + this.rank = rank; + } + }; + private boolean extended = false; private boolean formatted = false; private boolean dependency = false; private boolean logical = false; private boolean authorize = false; private boolean userLevelExplain = false; + private boolean vectorization = false; + private boolean vectorizationOnly = false; + private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; + private Path explainRootPath; private Map opIdToRuntimeNumRows; @@ -98,6 +113,30 @@ public void setUserLevelExplain(boolean userLevelExplain) { this.userLevelExplain = userLevelExplain; } + public boolean isVectorization() { + return vectorization; + } + + public void setVectorization(boolean vectorization) { + this.vectorization = vectorization; + } + + public boolean isVectorizationOnly() { + return vectorizationOnly; + } + + public void setVectorizationOnly(boolean vectorizationOnly) { + this.vectorizationOnly = vectorizationOnly; + } + + public VectorizationDetailLevel getVectorizationDetailLevel() { + return vectorizationDetailLevel; + } + + public void setVectorizationDetailLevel(VectorizationDetailLevel vectorizationDetailLevel) { + this.vectorizationDetailLevel = vectorizationDetailLevel; + } + public Path getExplainRootPath() { return explainRootPath; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 300542e..f62cf9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; @@ -70,7 +71,9 @@ public ExplainSemanticAnalyzer(QueryState queryState) throws SemanticException { @SuppressWarnings("unchecked") @Override public void analyzeInternal(ASTNode ast) throws SemanticException { - for (int i = 1; i < ast.getChildCount(); i++) { + final int childCount = ast.getChildCount(); + int i = 1; // Skip TOK_QUERY. + while (i < childCount) { int explainOptions = ast.getChild(i).getType(); if (explainOptions == HiveParser.KW_FORMATTED) { config.setFormatted(true); @@ -85,7 +88,40 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } else if (explainOptions == HiveParser.KW_ANALYZE) { config.setAnalyze(AnalyzeState.RUNNING); config.setExplainRootPath(ctx.getMRTmpPath()); + } else if (explainOptions == HiveParser.KW_VECTORIZATION) { + config.setVectorization(true); + if (i + 1 < childCount) { + int vectorizationOption = ast.getChild(i + 1).getType(); + + // [ONLY] + if (vectorizationOption == HiveParser.TOK_ONLY) { + config.setVectorizationOnly(true); + i++; + if (i + 1 >= childCount) { + break; + } + vectorizationOption = ast.getChild(i + 1).getType(); + } + + // [SUMMARY|OPERATOR|EXPRESSION|DETAIL] + if (vectorizationOption == HiveParser.TOK_SUMMARY) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY); + i++; + } else if (vectorizationOption == HiveParser.TOK_OPERATOR) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR); + i++; + } else if (vectorizationOption == HiveParser.TOK_EXPRESSION) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION); + i++; + } else if (vectorizationOption == HiveParser.TOK_DETAIL) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL); + i++; + } + } + } else { + // UNDONE: UNKNOWN OPTION? } + i++; } ctx.setExplainConfig(config); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 5d3fa6a..025ea10 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -335,6 +335,11 @@ KW_KEY: 'KEY'; KW_ABORT: 'ABORT'; KW_EXTRACT: 'EXTRACT'; KW_FLOOR: 'FLOOR'; +KW_VECTORIZATION: 'VECTORIZATION'; +KW_SUMMARY: 'SUMMARY'; +KW_OPERATOR: 'OPERATOR'; +KW_EXPRESSION: 'EXPRESSION'; +KW_DETAIL: 'DETAIL'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 5c16c55..eebd875 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -380,6 +380,11 @@ TOK_ROLLBACK; TOK_SET_AUTOCOMMIT; TOK_CACHE_METADATA; TOK_ABORT_TRANSACTIONS; +TOK_ONLY; +TOK_SUMMARY; +TOK_OPERATOR; +TOK_EXPRESSION; +TOK_DETAIL; } @@ -717,7 +722,28 @@ explainStatement explainOption @init { msgs.push("explain option"); } @after { msgs.pop(); } - : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE + : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE| + (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) + ; + +vectorizationOnly +@init { pushMsg("vectorization's only clause", state); } +@after { popMsg(state); } + : KW_ONLY + -> ^(TOK_ONLY) + ; + +vectorizatonDetail +@init { pushMsg("vectorization's detail level clause", state); } +@after { popMsg(state); } + : KW_SUMMARY + -> ^(TOK_SUMMARY) + | KW_OPERATOR + -> ^(TOK_OPERATOR) + | KW_EXPRESSION + -> ^(TOK_EXPRESSION) + | KW_DETAIL + -> ^(TOK_DETAIL) ; execStatement diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 50987c3..13f6879 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -719,6 +719,12 @@ nonReserved | KW_VALIDATE | KW_NOVALIDATE | KW_KEY + | KW_VECTORIZATION + | KW_SUMMARY + | KW_OPERATOR + | KW_EXPRESSION + | KW_DETAIL + ; //The following SQL2011 reserved keywords are used as function name only, but not as identifiers. diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index e217bdf..2c14203 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -29,6 +29,10 @@ public class AbstractOperatorDesc implements OperatorDesc { protected boolean vectorMode = false; + + // Extra parameters only for vectorization. + protected VectorDesc vectorDesc; + protected Statistics statistics; protected transient OpTraits opTraits; protected transient Map opProps; @@ -64,6 +68,14 @@ public void setVectorMode(boolean vm) { this.vectorMode = vm; } + public void setVectorDesc(VectorDesc vectorDesc) { + this.vectorDesc = vectorDesc; + } + + public VectorDesc getVectorDesc() { + return vectorDesc; + } + @Override public OpTraits getTraits() { return opTraits; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java index 5157ebd..4304b11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java @@ -18,10 +18,24 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.exec.Operator; + public class AbstractVectorDesc implements VectorDesc { + private static long serialVersionUID = 1L; + + private Class vectorOpClass; + @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException("clone not supported"); } + + public void setVectorOp(Class vectorOpClass) { + this.vectorOpClass = vectorOpClass; + } + + public Class getVectorOpClass() { + return vectorOpClass; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java index 264f959..c5294f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java @@ -19,7 +19,10 @@ package org.apache.hadoop.hive.ql.plan; import java.io.IOException; +import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.io.DataOutputBuffer; @@ -60,4 +63,25 @@ public void setTable(TableDesc table) { public void writeEventHeader(DataOutputBuffer buffer) throws IOException { // nothing to add } + + public class AppMasterEventOperatorExplainVectorization extends OperatorExplainVectorization { + + private final AppMasterEventDesc appMasterEventDesc; + private final VectorAppMasterEventDesc vectorAppMasterEventDesc; + + public AppMasterEventOperatorExplainVectorization(AppMasterEventDesc appMasterEventDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.appMasterEventDesc = appMasterEventDesc; + vectorAppMasterEventDesc = (VectorAppMasterEventDesc) vectorDesc; + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "App Master Event Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public AppMasterEventOperatorExplainVectorization getAppMasterEventVectorization() { + if (vectorDesc == null) { + return null; + } + return new AppMasterEventOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 13a0811..b061d5e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedList; import java.util.LinkedHashSet; import java.util.List; @@ -33,7 +34,9 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -65,12 +68,25 @@ public BaseWork(String name) { private String name; - // Vectorization. + /* + * Vectorization. + */ + + // This will be true if a node was examined by the Vectorizer class. + protected boolean vectorizationExamined; + + protected boolean vectorizationEnabled; protected VectorizedRowBatchCtx vectorizedRowBatchCtx; protected boolean useVectorizedInputFileFormat; + private VectorizerReason notVectorizedReason; + + private boolean groupByVectorOutput; + private boolean allNative; + private boolean usesVectorUDFAdaptor; + protected boolean llapMode = false; protected boolean uberMode = false; @@ -163,6 +179,22 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { // ----------------------------------------------------------------------------------------------- + public void setVectorizationExamined(boolean vectorizationExamined) { + this.vectorizationExamined = vectorizationExamined; + } + + public boolean getVectorizationExamined() { + return vectorizationExamined; + } + + public void setVectorizationEnabled(boolean vectorizationEnabled) { + this.vectorizationEnabled = vectorizationEnabled; + } + + public boolean getVectorizationEnabled() { + return vectorizationEnabled; + } + /* * The vectorization context for creating the VectorizedRowBatch for the node. */ @@ -174,23 +206,160 @@ public void setVectorizedRowBatchCtx(VectorizedRowBatchCtx vectorizedRowBatchCtx this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } - /* - * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable - * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated - * vectorizing this node. - * - * When Vectorized Input File Format looks at this flag, it can determine whether it should - * operate vectorized or not. In some modes, the node can be vectorized but use row - * serialization. - */ - public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { - this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + public void setNotVectorizedReason(VectorizerReason notVectorizedReason) { + this.notVectorizedReason = notVectorizedReason; + } + + public VectorizerReason getNotVectorizedReason() { + return notVectorizedReason; + } + + public void setGroupByVectorOutput(boolean groupByVectorOutput) { + this.groupByVectorOutput = groupByVectorOutput; + } + + public boolean getGroupByVectorOutput() { + return groupByVectorOutput; + } + + public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { + this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; } - public boolean getUseVectorizedInputFileFormat() { - return useVectorizedInputFileFormat; + public boolean getUsesVectorUDFAdaptor() { + return usesVectorUDFAdaptor; } + public void setAllNative(boolean allNative) { + this.allNative = allNative; + } + + public boolean getAllNative() { + return allNative; + } + + public static class BaseExplainVectorization { + + private final BaseWork baseWork; + + public BaseExplainVectorization(BaseWork baseWork) { + this.baseWork = baseWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean enabled() { + return baseWork.getVectorizationEnabled(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean vectorized() { + if (!baseWork.getVectorizationEnabled()) { + return null; + } + return baseWork.getVectorMode(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String notVectorizedReason() { + if (!baseWork.getVectorizationEnabled() || baseWork.getVectorMode()) { + return null; + } + VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + return "Unknown"; + } + return notVectorizedReason.toString(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "groupByVectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean groupByRowOutputCascade() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getGroupByVectorOutput(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean nativeVectorized() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getAllNative(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean usesVectorUDFAdaptor() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getUsesVectorUDFAdaptor(); + } + + public static class RowBatchContextExplainVectorization { + + private final VectorizedRowBatchCtx vectorizedRowBatchCtx; + + public RowBatchContextExplainVectorization(VectorizedRowBatchCtx vectorizedRowBatchCtx) { + this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; + } + + private List getColumns(int startIndex, int count) { + String[] rowColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); + TypeInfo[] rowColumnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); + List result = new ArrayList(count); + final int end = startIndex + count; + for (int i = startIndex; i < end; i++) { + result.add(rowColumnNames[i] + ":" + rowColumnTypeInfos[i]); + } + return result; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getDataColumns() { + return getColumns(0, vectorizedRowBatchCtx.getDataColumnCount()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getPartitionColumns() { + return getColumns(vectorizedRowBatchCtx.getDataColumnCount(), vectorizedRowBatchCtx.getPartitionColumnCount()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "includeColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getDataColumnNums() { + int[] dataColumnNums = vectorizedRowBatchCtx.getDataColumnNums(); + if (dataColumnNums == null) { + return null; + } + return Arrays.toString(vectorizedRowBatchCtx.getDataColumnNums()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public int getDataColumnCount() { + return vectorizedRowBatchCtx.getDataColumnCount(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public int getPartitionColumnCount() { + return vectorizedRowBatchCtx.getPartitionColumnCount(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "scratchColumnTypeNames", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getScratchColumnTypeNames() { + return Arrays.asList(vectorizedRowBatchCtx.getScratchColumnTypeNames()); + } + + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "rowBatchContext", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public RowBatchContextExplainVectorization vectorizedRowBatchContext() { + if (!baseWork.getVectorMode()) { + return null; + } + return new RowBatchContextExplainVectorization(baseWork.getVectorizedRowBatchCtx()); + } + } + + // ----------------------------------------------------------------------------------------------- /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index b0b6c3a..7b16ad7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -45,4 +45,33 @@ public boolean in(Level[] levels) { boolean displayOnlyOnTrue() default false; boolean skipHeader() default false; + + // By default, many existing @Explain classes/methods are NON_VECTORIZED. + // + // Vectorized methods/classes have detail levels: + // SUMMARY, OPERATOR, EXPRESSION, or DETAIL. + // As you go to the right you get more detail and the information for the previous level(s) is + // included. The default is SUMMARY. + // + // The "path" enumerations are used to mark methods/classes that lead to vectorization specific + // ones so we can avoid displaying headers for things that have no vectorization information + // below. + // + // For example, the TezWork class is marked SUMMARY_PATH because it leads to both + // SUMMARY and OPERATOR methods/classes. And, MapWork.getAllRootOperators is marked OPERATOR_PATH + // because we only display operator information for OPERATOR. + // + // EXPRESSION and DETAIL typically live inside SUMMARY or OPERATOR classes. + // + public enum Vectorization { + SUMMARY_PATH(4), OPERATOR_PATH(3), + SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1), + NON_VECTORIZED(Integer.MAX_VALUE); + + public final int rank; + Vectorization(int rank) { + this.rank = rank; + } + }; + Vectorization vectorization() default Vectorization.NON_VECTORIZED; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 9f4767c..805357c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; import org.apache.hadoop.hive.ql.parse.ParseContext; /** @@ -117,6 +118,18 @@ public boolean isFormatted() { return config.isFormatted(); } + public boolean isVectorization() { + return config.isVectorization(); + } + + public boolean isVectorizationOnly() { + return config.isVectorizationOnly(); + } + + public VectorizationDetailLevel isVectorizationDetailLevel() { + return config.getVectorizationDetailLevel(); + } + public ParseContext getParseContext() { return pCtx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 8ea6440..3c69f69 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.fs.Path; @@ -30,14 +31,17 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * FetchWork. * */ -@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class FetchWork implements Serializable { private static final long serialVersionUID = 1L; @@ -307,4 +311,43 @@ public String toString() { return ret; } + + // ----------------------------------------------------------------------------------------------- + + private boolean vectorizationExamined; + + public void setVectorizationExamined(boolean vectorizationExamined) { + this.vectorizationExamined = vectorizationExamined; + } + + public boolean getVectorizationExamined() { + return vectorizationExamined; + } + + public class FetchExplainVectorization { + + private final FetchWork fetchWork; + + public FetchExplainVectorization(FetchWork fetchWork) { + this.fetchWork = fetchWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean enabled() { + return false; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsNotMet() { + return VectorizationCondition.getConditionsSupported(false); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Fetch Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FetchExplainVectorization getMapExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new FetchExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 07ed4fd..bbc5f10 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * FileSinkDesc. @@ -474,4 +474,19 @@ public void setStatsTmpDir(String statsCollectionTempDir) { this.statsTmpDir = statsCollectionTempDir; } + public class FileSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + public FileSinkOperatorExplainVectorization(VectorDesc vectorDesc) { + // Native vectorization not supported. + super(vectorDesc, false); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "File Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FileSinkOperatorExplainVectorization getFileSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new FileSinkOperatorExplainVectorization(vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index fa20798..ff69775 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; @@ -177,6 +178,7 @@ public void setSyntheticJoinPredicate(boolean syntheticJoinPredicate) { this.syntheticJoinPredicate = syntheticJoinPredicate; } + @Override public Object clone() { FilterDesc filterDesc = new FilterDesc(getPredicate().clone(), getIsSamplingPred()); @@ -186,4 +188,30 @@ public Object clone() { filterDesc.setSortedFilter(isSortedFilter()); return filterDesc; } + + public class FilterOperatorExplainVectorization extends OperatorExplainVectorization { + + private final FilterDesc filterDesc; + private final VectorFilterDesc vectorFilterDesc; + + public FilterOperatorExplainVectorization(FilterDesc filterDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.filterDesc = filterDesc; + vectorFilterDesc = (VectorFilterDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "predicateExpression", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getPredicateExpression() { + return vectorFilterDesc.getPredicateExpression().toString(); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Filter Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FilterOperatorExplainVectorization getFilterVectorization() { + if (vectorDesc == null) { + return null; + } + return new FilterOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 99791e5..204277e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -19,13 +19,18 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hive.common.util.AnnotationUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; /** @@ -71,11 +76,8 @@ transient private boolean isDistinct; private boolean dontResetAggrsDistinct; - // Extra parameters only for vectorization. - private VectorGroupByDesc vectorDesc; - public GroupByDesc() { - vectorDesc = new VectorGroupByDesc(); + vectorDesc = null; } public GroupByDesc( @@ -106,7 +108,7 @@ public GroupByDesc( final boolean groupingSetsPresent, final int groupingSetsPosition, final boolean isDistinct) { - vectorDesc = new VectorGroupByDesc(); + vectorDesc = null; this.mode = mode; this.outputColumnNames = outputColumnNames; this.keys = keys; @@ -120,14 +122,6 @@ public GroupByDesc( this.isDistinct = isDistinct; } - public void setVectorDesc(VectorGroupByDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorGroupByDesc getVectorDesc() { - return vectorDesc; - } - public Mode getMode() { return mode; } @@ -311,4 +305,66 @@ public void setDistinct(boolean isDistinct) { this.isDistinct = isDistinct; } + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { + + private final GroupByDesc groupByDesc; + private final VectorGroupByDesc vectorGroupByDesc; + + public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorDesc vectorDesc) { + // Native vectorization not supported. + super(vectorDesc, false); + this.groupByDesc = groupByDesc; + vectorGroupByDesc = (VectorGroupByDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeysExpression() { + return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getAggregators() { + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + List vecAggrList = new ArrayList(vecAggregators.length); + for (VectorAggregateExpression vecAggr : vecAggregators) { + vecAggrList.add(vecAggr.toString()); + } + return vecAggrList; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean getGroupByRowOutputCascade() { + return vectorGroupByDesc.isVectorOutput(); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getVectorOutputConditionsNotMet() { + List results = new ArrayList(); + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + for (VectorAggregateExpression vecAggr : vecAggregators) { + Category category = Vectorizer.aggregationOutputCategory(vecAggr); + if (category != ObjectInspector.Category.PRIMITIVE) { + results.add( + "Vector output of " + vecAggr.toString() + " output type " + category + " requires PRIMITIVE IS false"); + } + } + if (results.size() == 0) { + return null; + } + return results; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public GroupByOperatorExplainVectorization getGroupByVectorization() { + if (vectorDesc == null) { + return null; + } + return new GroupByOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java index 94ac41e..a338319 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java index 8448a41..45ec431 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java @@ -17,7 +17,10 @@ */ package org.apache.hadoop.hive.ql.plan; +import java.util.List; + import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -73,4 +76,19 @@ public void setLeastRows(int leastRows) { this.leastRows = leastRows; } + public class LimitOperatorExplainVectorization extends OperatorExplainVectorization { + + public LimitOperatorExplainVectorization(LimitDesc limitDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Limit Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public LimitOperatorExplainVectorization getLimitVectorization() { + if (vectorDesc == null) { + return null; + } + return new LimitOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index ec35860..3633fde 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -20,14 +20,24 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; /** * Map Join operator Descriptor implementation. @@ -73,17 +83,16 @@ private boolean isHybridHashJoin; private boolean isDynamicPartitionHashJoin = false; - // Extra parameters only for vectorization. - private VectorMapJoinDesc vectorDesc; - public MapJoinDesc() { - vectorDesc = new VectorMapJoinDesc(); + vectorDesc = null; bigTableBucketNumMapping = new LinkedHashMap(); } public MapJoinDesc(MapJoinDesc clone) { super(clone); - vectorDesc = new VectorMapJoinDesc(clone.vectorDesc); + if (clone.vectorDesc != null) { + throw new RuntimeException("Clone with vectorization desc not supported"); + } this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; @@ -108,7 +117,7 @@ public MapJoinDesc(final Map> keys, final int posBigTable, final JoinCondDesc[] conds, final Map> filters, boolean noOuterJoin, String dumpFilePrefix) { super(values, outputColumnNames, noOuterJoin, conds, filters, null); - vectorDesc = new VectorMapJoinDesc(); + vectorDesc = null; this.keys = keys; this.keyTblDesc = keyTblDesc; this.valueTblDescs = valueTblDescs; @@ -119,14 +128,6 @@ public MapJoinDesc(final Map> keys, initRetainExprList(); } - public void setVectorDesc(VectorMapJoinDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorMapJoinDesc getVectorDesc() { - return vectorDesc; - } - private void initRetainExprList() { retainList = new HashMap>(); Set>> set = super.getExprs().entrySet(); @@ -388,4 +389,193 @@ public boolean isDynamicPartitionHashJoin() { public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } + + // Use LinkedHashSet to give predictable display order. + private static Set vectorizableMapJoinNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class MapJoinOperatorExplainVectorization extends OperatorExplainVectorization { + + private final MapJoinDesc mapJoinDesc; + private final VectorMapJoinDesc vectorMapJoinDesc; + private final VectorMapJoinInfo vectorMapJoinInfo; + + private VectorizationCondition[] nativeConditions; + + public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorDesc vectorDesc) { + // VectorMapJoinOperator is not native vectorized. + super(vectorDesc, ((VectorMapJoinDesc) vectorDesc).hashTableImplementationType() != HashTableImplementationType.NONE); + this.mapJoinDesc = mapJoinDesc; + vectorMapJoinDesc = (VectorMapJoinDesc) vectorDesc; + vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo(); + } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorMapJoinDesc.getIsVectorizationMapJoinNativeEnabled(); + + String engine = vectorMapJoinDesc.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableMapJoinNativeEngines; + boolean engineInSupported = vectorizableMapJoinNativeEngines.contains(engine); + + boolean isFastHashTableEnabled = vectorMapJoinDesc.getIsFastHashTableEnabled(); + boolean isHybridHashJoin = vectorMapJoinDesc.getIsHybridHashJoin(); + + boolean whenFastHashTableThenNoHybrid = + (!isFastHashTableEnabled ? true : !isHybridHashJoin); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName), + new VectorizationCondition( + vectorMapJoinDesc.getOneMapJoinCondition(), + "One MapJoin Condition"), + new VectorizationCondition( + !vectorMapJoinDesc.getHasNullSafes(), + "No nullsafe"), + new VectorizationCondition( + vectorMapJoinDesc.getSupportsKeyTypes(), + "Supports Key Types"), + new VectorizationCondition( + !vectorMapJoinDesc.getIsEmptyKey(), + "Not empty key"), + new VectorizationCondition( + whenFastHashTableThenNoHybrid, + "When Fast Hash Table, then requires no Hybrid Hash Join"), + new VectorizationCondition( + vectorMapJoinDesc.getSmallTableExprVectorizes(), + "Small table vectorizes"), + }; + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableKeyExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableKeyExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableKeyColumns() { + if (!isNative) { + return null; + } + int[] bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); + if (bigTableKeyColumnMap.length == 0) { + return null; + } + return Arrays.toString(bigTableKeyColumnMap); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableValueExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableValueExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableValueColumns() { + if (!isNative) { + return null; + } + int[] bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); + if (bigTableValueColumnMap.length == 0) { + return null; + } + return Arrays.toString(bigTableValueColumnMap); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getSmallTableColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getSmallTableMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getProjectionMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableOuterKey() { + if (!isNative || vectorMapJoinDesc.operatorVariation() != OperatorVariation.OUTER) { + return null; + } + return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainedColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableRetainedColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getBigTableRetainedMapping()); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeNotSupportedKeyTypes() { + return vectorMapJoinDesc.getNotSupportedKeyTypes(); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MapJoinOperatorExplainVectorization getMapJoinVectorization() { + if (vectorDesc == null || this instanceof SMBJoinDesc) { + return null; + } + return new MapJoinOperatorExplainVectorization(this, vectorDesc); + } + + public class SMBJoinOperatorExplainVectorization extends OperatorExplainVectorization { + + private final SMBJoinDesc smbJoinDesc; + private final VectorSMBJoinDesc vectorSMBJoinDesc; + + public SMBJoinOperatorExplainVectorization(SMBJoinDesc smbJoinDesc, VectorDesc vectorDesc) { + // Native vectorization NOT supported. + super(vectorDesc, false); + this.smbJoinDesc = smbJoinDesc; + vectorSMBJoinDesc = (VectorSMBJoinDesc) vectorDesc; + } + } + + // Handle dual nature. + @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SMBJoinOperatorExplainVectorization getSMBJoinVectorization() { + if (vectorDesc == null || !(this instanceof SMBJoinDesc)) { + return null; + } + return new SMBJoinOperatorExplainVectorization((SMBJoinDesc) this, vectorDesc); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 5cc3663..081c511 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -26,6 +26,7 @@ import java.util.BitSet; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -47,8 +48,10 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -140,6 +143,12 @@ private VectorizedRowBatch vectorizedRowBatch; + private VectorizerReason notEnabledInputFileFormatReason; + + private Set vectorizationInputFileFormatClassNameSet; + private List vectorizationEnabledConditionsMet; + private List vectorizationEnabledConditionsNotMet; + // bitsets can't be correctly serialized by Kryo's default serializer // BitSet::wordsInUse is transient, so force dumping into a lower form private byte[] includedBuckets; @@ -357,7 +366,7 @@ public void setAliasToWork( return nameToSplitSample; } - @Explain(displayName = "LLAP IO") + @Explain(displayName = "LLAP IO", vectorization = Vectorization.SUMMARY_PATH) public String getLlapIoDesc() { return llapIoDesc; } @@ -429,7 +438,8 @@ private void setAliases() { } } - @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public String getExecutionMode() { if (vectorMode) { if (llapMode) { @@ -459,7 +469,8 @@ public void replaceRoots(Map, Operator> replacementMap) { } @Override - @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.OPERATOR_PATH) public Set> getAllRootOperators() { Set> opSet = new LinkedHashSet>(); @@ -716,4 +727,86 @@ public void setVectorizedRowBatch(VectorizedRowBatch vectorizedRowBatch) { public VectorizedRowBatch getVectorizedRowBatch() { return vectorizedRowBatch; } + + /* + * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable + * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated + * vectorizing this node. + * + * When Vectorized Input File Format looks at this flag, it can determine whether it should + * operate vectorized or not. In some modes, the node can be vectorized but use row + * serialization. + */ + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } + + public boolean getUseVectorizedInputFileFormat() { + return useVectorizedInputFileFormat; + } + + public void setNotEnabledInputFileFormatReason(VectorizerReason notEnabledInputFileFormatReason) { + this.notEnabledInputFileFormatReason = notEnabledInputFileFormatReason; + } + + public VectorizerReason getNotEnabledInputFileFormatReason() { + return notEnabledInputFileFormatReason; + } + + public void setVectorizationInputFileFormatClassNameSet(Set vectorizationInputFileFormatClassNameSet) { + this.vectorizationInputFileFormatClassNameSet = vectorizationInputFileFormatClassNameSet; + } + + public Set getVectorizationInputFileFormatClassNameSet() { + return vectorizationInputFileFormatClassNameSet; + } + + public void setVectorizationEnabledConditionsMet(ArrayList vectorizationEnabledConditionsMet) { + this.vectorizationEnabledConditionsMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsMet, true); + } + + public List getVectorizationEnabledConditionsMet() { + return vectorizationEnabledConditionsMet; + } + + public void setVectorizationEnabledConditionsNotMet(List vectorizationEnabledConditionsNotMet) { + this.vectorizationEnabledConditionsNotMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsNotMet, false); + } + + public List getVectorizationEnabledConditionsNotMet() { + return vectorizationEnabledConditionsNotMet; + } + + public class MapExplainVectorization extends BaseExplainVectorization { + + private final MapWork mapWork; + + public MapExplainVectorization(MapWork mapWork) { + super(mapWork); + this.mapWork = mapWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "inputFileFormats", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Set inputFileFormats() { + return mapWork.getVectorizationInputFileFormatClassNameSet(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsMet() { + return mapWork.getVectorizationEnabledConditionsMet(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsNotMet() { + return mapWork.getVectorizationEnabledConditionsNotMet(); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Map Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MapExplainVectorization getMapExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new MapExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java index 82143a6..76b5138 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java @@ -32,13 +32,15 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredLocalWork. * */ -@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class MapredLocalWork implements Serializable { private static final long serialVersionUID = 1L; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index aa7f6ed..af9adc2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -24,14 +24,15 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredWork. * */ -@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class MapredWork extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -40,7 +41,8 @@ private boolean finalMapRed; - @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public MapWork getMapWork() { return mapWork; } @@ -49,7 +51,8 @@ public void setMapWork(MapWork mapWork) { this.mapWork = mapWork; } - @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public ReduceWork getReduceWork() { return reduceWork; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java new file mode 100644 index 0000000..bdf9859 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnMapping; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +public class OperatorExplainVectorization { + + protected final VectorDesc vectorDesc; + + protected final boolean isNative; + + public OperatorExplainVectorization(VectorDesc vectorDesc, boolean isNative) { + this.vectorDesc = vectorDesc; + this.isNative = isNative; + } + + public List vectorExpressionsToStringList(VectorExpression[] vectorExpressions) { + if (vectorExpressions == null) { + return null; + } + List vecExprList = new ArrayList(vectorExpressions.length); + for (VectorExpression vecExpr : vectorExpressions) { + vecExprList.add(vecExpr.toString()); + } + return vecExprList; + } + + public String outputColumnsToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + return Arrays.toString(outputColumns); + } + + public List columnMappingToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] inputColumns = vectorColumnMapping.getInputColumns(); + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + ArrayList result = new ArrayList(size); + for (int i = 0; i < size; i++) { + result.add(inputColumns[i] + " -> " + outputColumns[i]); + } + return result; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "className", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getClassName() { + return vectorDesc.getVectorOpClass().getSimpleName(); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "native", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean getNative() { + return isNative; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index d7e404c..b8c2d42 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -19,11 +19,18 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.EnumSet; +import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,9 +128,6 @@ private ReducerTraits(int trait) { private static transient Logger LOG = LoggerFactory.getLogger(ReduceSinkDesc.class); - // Extra parameters only for vectorization. - private VectorReduceSinkDesc vectorDesc; - public ReduceSinkDesc() { } @@ -187,14 +191,6 @@ public Object clone() { return desc; } - public void setVectorDesc(VectorReduceSinkDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorReduceSinkDesc getVectorDesc() { - return vectorDesc; - } - public java.util.ArrayList getOutputKeyColumnNames() { return outputKeyColumnNames; } @@ -490,4 +486,105 @@ public void setHasOrderBy(boolean hasOrderBy) { this.hasOrderBy = hasOrderBy; } + // Use LinkedHashSet to give predictable display order. + private static Set vectorizableReduceSinkNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class ReduceSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + private final ReduceSinkDesc reduceSinkDesc; + private final VectorReduceSinkDesc vectorReduceSinkDesc; + private final VectorReduceSinkInfo vectorReduceSinkInfo; + + private VectorizationCondition[] nativeConditions; + + public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, VectorDesc vectorDesc) { + // VectorReduceSinkOperator is not native vectorized. + super(vectorDesc, ((VectorReduceSinkDesc) vectorDesc).reduceSinkKeyType()!= ReduceSinkKeyType.NONE); + this.reduceSinkDesc = reduceSinkDesc; + vectorReduceSinkDesc = (VectorReduceSinkDesc) vectorDesc; + vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo(); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeyExpression() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkKeyExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "valueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getValueExpression() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); + } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorReduceSinkDesc.getIsVectorizationReduceSinkNativeEnabled(); + + String engine = vectorReduceSinkDesc.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableReduceSinkNativeEngines; + boolean engineInSupported = vectorizableReduceSinkNativeEngines.contains(engine); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName), + new VectorizationCondition( + !vectorReduceSinkDesc.getAcidChange(), + "Not ACID UPDATE or DELETE"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasBuckets(), + "No buckets"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasTopN(), + "No TopN"), + new VectorizationCondition( + vectorReduceSinkDesc.getUseUniformHash(), + "Uniform Hash"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasDistinctColumns(), + "No DISTINCT columns"), + new VectorizationCondition( + vectorReduceSinkDesc.getIsKeyBinarySortable(), + "BinarySortableSerDe for keys"), + new VectorizationCondition( + vectorReduceSinkDesc.getIsValueLazyBinary(), + "LazyBinarySerDe for values") + }; + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Reduce Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public ReduceSinkOperatorExplainVectorization getReduceSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new ReduceSinkOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index 72fc4ca..f4ab2a0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -19,17 +19,23 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -89,6 +95,9 @@ public ReduceWork(String name) { private ObjectInspector keyObjectInspector = null; private ObjectInspector valueObjectInspector = null; + private boolean reduceVectorizationEnabled; + private String vectorReduceEngine; + /** * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing * to keySerializeInfo of the ReduceSink @@ -142,7 +151,8 @@ public void setTagToValueDesc(final List tagToValueDesc) { this.tagToValueDesc = tagToValueDesc; } - @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public String getExecutionMode() { if (vectorMode) { if (llapMode) { @@ -160,7 +170,8 @@ public String getExecutionMode() { return null; } - @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.OPERATOR_PATH) public Operator getReducer() { return reducer; } @@ -252,4 +263,81 @@ public int getMaxReduceTasks() { public void setMaxReduceTasks(int maxReduceTasks) { this.maxReduceTasks = maxReduceTasks; } + + public void setReduceVectorizationEnabled(boolean reduceVectorizationEnabled) { + this.reduceVectorizationEnabled = reduceVectorizationEnabled; + } + + public boolean getReduceVectorizationEnabled() { + return reduceVectorizationEnabled; + } + + public void setVectorReduceEngine(String vectorReduceEngine) { + this.vectorReduceEngine = vectorReduceEngine; + } + + public String getVectorReduceEngine() { + return vectorReduceEngine; + } + + // Use LinkedHashSet to give predictable display order. + private static Set reduceVectorizableEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class ReduceExplainVectorization extends BaseExplainVectorization { + + private final ReduceWork reduceWork; + + private VectorizationCondition[] reduceVectorizationConditions; + + public ReduceExplainVectorization(ReduceWork reduceWork) { + super(reduceWork); + this.reduceWork = reduceWork; + } + + private VectorizationCondition[] createReduceExplainVectorizationConditions() { + + boolean enabled = reduceWork.getReduceVectorizationEnabled(); + + String engine = reduceWork.getVectorReduceEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + reduceVectorizableEngines; + + boolean engineInSupported = reduceVectorizableEngines.contains(engine); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName) + }; + return conditions; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsMet() { + if (reduceVectorizationConditions == null) { + reduceVectorizationConditions = createReduceExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsMet(reduceVectorizationConditions); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsNotMet() { + if (reduceVectorizationConditions == null) { + reduceVectorizationConditions = createReduceExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsNotMet(reduceVectorizationConditions); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Reduce Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public ReduceExplainVectorization getReduceExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new ReduceExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java index 67a8327..0601ce0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java @@ -19,8 +19,11 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; + import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -135,4 +138,36 @@ public boolean isSelStarNoCompute() { public void setSelStarNoCompute(boolean selStarNoCompute) { this.selStarNoCompute = selStarNoCompute; } + + + public class SelectOperatorExplainVectorization extends OperatorExplainVectorization { + + private final SelectDesc selectDesc; + private final VectorSelectDesc vectorSelectDesc; + + public SelectOperatorExplainVectorization(SelectDesc selectDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.selectDesc = selectDesc; + vectorSelectDesc = (VectorSelectDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "selectExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getSelectExpressions() { + return vectorExpressionsToStringList(vectorSelectDesc.getSelectExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + return Arrays.toString(vectorSelectDesc.getProjectedOutputColumns()); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Select Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SelectOperatorExplainVectorization getSelectVectorization() { + if (vectorDesc == null) { + return null; + } + return new SelectOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java index 8833ae3..260bc07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; + /** * Map Join operator Descriptor implementation. * @@ -43,4 +46,26 @@ public byte getTag() { public void setTag(byte tag) { this.tag = tag; } + + public class SparkHashTableSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + private final HashTableSinkDesc filterDesc; + private final VectorSparkHashTableSinkDesc vectorHashTableSinkDesc; + + public SparkHashTableSinkOperatorExplainVectorization(HashTableSinkDesc filterDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.filterDesc = filterDesc; + vectorHashTableSinkDesc = (VectorSparkHashTableSinkDesc) vectorDesc; + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Spark Hash Table Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SparkHashTableSinkOperatorExplainVectorization getHashTableSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new SparkHashTableSinkOperatorExplainVectorization(this, vectorDesc); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java index bb5dd79..066e32d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; - import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -34,6 +33,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import com.google.common.base.Preconditions; @@ -43,7 +43,7 @@ * roots and and ReduceWork at all other nodes. */ @SuppressWarnings("serial") -@Explain(displayName = "Spark") +@Explain(displayName = "Spark", vectorization = Vectorization.SUMMARY_PATH) public class SparkWork extends AbstractOperatorDesc { private static int counter; private final String name; @@ -76,7 +76,7 @@ public String getName() { /** * @return a map of "vertex name" to BaseWork */ - @Explain(displayName = "Vertices") + @Explain(displayName = "Vertices", vectorization = Vectorization.SUMMARY_PATH) public Map getWorkMap() { Map result = new LinkedHashMap(); for (BaseWork w: getAllWork()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index ebe613e..78b2e8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.TableSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde.serdeConstants; @@ -396,4 +397,29 @@ public boolean isNeedSkipHeaderFooters() { return opProps; } + public class TableScanOperatorExplainVectorization extends OperatorExplainVectorization { + + private final TableScanDesc tableScanDesc; + private final VectorTableScanDesc vectorTableScanDesc; + + public TableScanOperatorExplainVectorization(TableScanDesc tableScanDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.tableScanDesc = tableScanDesc; + vectorTableScanDesc = (VectorTableScanDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + return Arrays.toString(vectorTableScanDesc.getProjectedOutputColumns()); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "TableScan Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public TableScanOperatorExplainVectorization getTableScanVectorization() { + if (vectorDesc == null) { + return null; + } + return new TableScanOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java index 7a70e6b..a037ea3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * TezWork. This class encapsulates all the work objects that can be executed @@ -49,7 +49,8 @@ * */ @SuppressWarnings("serial") -@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class TezWork extends AbstractOperatorDesc { public enum VertexType { @@ -107,7 +108,8 @@ public String getDagId() { /** * getWorkMap returns a map of "vertex name" to BaseWork */ - @Explain(displayName = "Vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public Map getWorkMap() { Map result = new LinkedHashMap(); for (BaseWork w: getAllWork()) { @@ -306,7 +308,8 @@ public int compareTo(Dependency o) { } } - @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public Map> getDependencyMap() { Map> result = new LinkedHashMap>(); for (Map.Entry> entry: invertedWorkGraph.entrySet()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java new file mode 100644 index 0000000..2e11321 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorAppMasterEventDesc. + * + * Extra parameters beyond AppMasterEventDesc just for the VectorAppMasterEventDescOperator. + * + * We don't extend AppMasterEventDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorAppMasterEventDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorAppMasterEventDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java index 3a2efdb..078408c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java @@ -22,4 +22,9 @@ public interface VectorDesc extends Serializable, Cloneable { public Object clone() throws CloneNotSupportedException; + + public void setVectorOp(Class vectorOpClass); + + public Class getVectorOpClass(); + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java new file mode 100644 index 0000000..325ac91 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorFileSinkDesc. + * + * Extra parameters beyond FileSinkDesc just for the VectorFileSinkOperator. + * + * We don't extend FileSinkDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorFileSinkDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorFileSinkDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java new file mode 100644 index 0000000..6feed84 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * VectorFilterDesc. + * + * Extra parameters beyond FilterDesc just for the VectorFilterOperator. + * + * We don't extend FilterDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorFilterDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + private VectorExpression predicateExpression; + + public VectorFilterDesc() { + } + + public void setPredicateExpression(VectorExpression predicateExpression) { + this.predicateExpression = predicateExpression; + } + + public VectorExpression getPredicateExpression() { + return predicateExpression; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index 08f8ebf..f8554e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; + /** * VectorGroupByDesc. * @@ -59,6 +62,10 @@ private boolean isVectorOutput; + private VectorExpression[] keyExpressions; + private VectorAggregateExpression[] aggregators; + private int[] projectedOutputColumns; + public VectorGroupByDesc() { this.processingMode = ProcessingMode.NONE; this.isVectorOutput = false; @@ -79,6 +86,30 @@ public void setVectorOutput(boolean isVectorOutput) { this.isVectorOutput = isVectorOutput; } + public void setKeyExpressions(VectorExpression[] keyExpressions) { + this.keyExpressions = keyExpressions; + } + + public VectorExpression[] getKeyExpressions() { + return keyExpressions; + } + + public void setAggregators(VectorAggregateExpression[] aggregators) { + this.aggregators = aggregators; + } + + public VectorAggregateExpression[] getAggregators() { + return aggregators; + } + + public void setProjectedOutputColumns(int[] projectedOutputColumns) { + this.projectedOutputColumns = projectedOutputColumns; + } + + public int[] getProjectedOutputColumns() { + return projectedOutputColumns; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java new file mode 100644 index 0000000..c9bc45a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorLimitDesc. + * + * Extra parameters beyond LimitDesc just for the VectorLimitOperator. + * + * We don't extend LimitDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorLimitDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorLimitDesc() { + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 8ea230f..1252140 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -18,9 +18,13 @@ package org.apache.hadoop.hive.ql.plan; +import java.util.List; + import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import com.google.common.base.Preconditions; + /** * VectorGroupByDesc. * @@ -79,23 +83,38 @@ public PrimitiveTypeInfo getPrimitiveTypeInfo() { } } + public static enum OperatorVariation { + NONE, + INNER_BIG_ONLY, + INNER, + LEFT_SEMI, + OUTER + } + private HashTableImplementationType hashTableImplementationType; private HashTableKind hashTableKind; private HashTableKeyType hashTableKeyType; + private OperatorVariation operatorVariation; private boolean minMaxEnabled; + private VectorMapJoinInfo vectorMapJoinInfo; + public VectorMapJoinDesc() { hashTableImplementationType = HashTableImplementationType.NONE; hashTableKind = HashTableKind.NONE; hashTableKeyType = HashTableKeyType.NONE; + operatorVariation = OperatorVariation.NONE; minMaxEnabled = false; + vectorMapJoinInfo = null; } public VectorMapJoinDesc(VectorMapJoinDesc clone) { this.hashTableImplementationType = clone.hashTableImplementationType; this.hashTableKind = clone.hashTableKind; this.hashTableKeyType = clone.hashTableKeyType; + this.operatorVariation = clone.operatorVariation; this.minMaxEnabled = clone.minMaxEnabled; + this.vectorMapJoinInfo = clone.vectorMapJoinInfo; } public HashTableImplementationType hashTableImplementationType() { @@ -122,6 +141,14 @@ public void setHashTableKeyType(HashTableKeyType hashTableKeyType) { this.hashTableKeyType = hashTableKeyType; } + public OperatorVariation operatorVariation() { + return operatorVariation; + } + + public void setOperatorVariation(OperatorVariation operatorVariation) { + this.operatorVariation = operatorVariation; + } + public boolean minMaxEnabled() { return minMaxEnabled; } @@ -129,4 +156,87 @@ public boolean minMaxEnabled() { public void setMinMaxEnabled(boolean minMaxEnabled) { this.minMaxEnabled = minMaxEnabled; } + + public void setVectorMapJoinInfo(VectorMapJoinInfo vectorMapJoinInfo) { + Preconditions.checkState(vectorMapJoinInfo != null); + this.vectorMapJoinInfo = vectorMapJoinInfo; + } + + public VectorMapJoinInfo getVectorMapJoinInfo() { + return vectorMapJoinInfo; + } + + private boolean isVectorizationMapJoinNativeEnabled; + private String engine; + private boolean oneMapJoinCondition; + private boolean hasNullSafes; + private boolean isFastHashTableEnabled; + private boolean isHybridHashJoin; + private boolean supportsKeyTypes; + private List notSupportedKeyTypes; + private boolean isEmptyKey; + private boolean smallTableExprVectorizes; + + public void setIsVectorizationMapJoinNativeEnabled(boolean isVectorizationMapJoinNativeEnabled) { + this.isVectorizationMapJoinNativeEnabled = isVectorizationMapJoinNativeEnabled; + } + public boolean getIsVectorizationMapJoinNativeEnabled() { + return isVectorizationMapJoinNativeEnabled; + } + public void setEngine(String engine) { + this.engine = engine; + } + public String getEngine() { + return engine; + } + public void setOneMapJoinCondition(boolean oneMapJoinCondition) { + this.oneMapJoinCondition = oneMapJoinCondition; + } + public boolean getOneMapJoinCondition() { + return oneMapJoinCondition; + } + public void setHasNullSafes(boolean hasNullSafes) { + this.hasNullSafes = hasNullSafes; + } + public boolean getHasNullSafes() { + return hasNullSafes; + } + public void setSupportsKeyTypes(boolean supportsKeyTypes) { + this.supportsKeyTypes = supportsKeyTypes; + } + public boolean getSupportsKeyTypes() { + return supportsKeyTypes; + } + public void setNotSupportedKeyTypes(List notSupportedKeyTypes) { + this.notSupportedKeyTypes = notSupportedKeyTypes; + } + public List getNotSupportedKeyTypes() { + return notSupportedKeyTypes; + } + public void setIsEmptyKey(boolean isEmptyKey) { + this.isEmptyKey = isEmptyKey; + } + public boolean getIsEmptyKey() { + return isEmptyKey; + } + public void setSmallTableExprVectorizes(boolean smallTableExprVectorizes) { + this.smallTableExprVectorizes = smallTableExprVectorizes; + } + public boolean getSmallTableExprVectorizes() { + return smallTableExprVectorizes; + } + + public void setIsFastHashTableEnabled(boolean isFastHashTableEnabled) { + this.isFastHashTableEnabled = isFastHashTableEnabled; + } + public boolean getIsFastHashTableEnabled() { + return isFastHashTableEnabled; + } + public void setIsHybridHashJoin(boolean isHybridHashJoin) { + this.isHybridHashJoin = isHybridHashJoin; + } + public boolean getIsHybridHashJoin() { + return isHybridHashJoin; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java new file mode 100644 index 0000000..2cf2e72 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * VectorMapJoinInfo. + * + * A convenience data structure that has information needed to vectorize map join. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and again by the VectorMapJoinOperator's + * constructors and later during execution. + */ +public class VectorMapJoinInfo { + + private static long serialVersionUID = 1L; + + private int[] bigTableKeyColumnMap; + private String[] bigTableKeyColumnNames; + private TypeInfo[] bigTableKeyTypeInfos; + private VectorExpression[] bigTableKeyExpressions; + + private int[] bigTableValueColumnMap; + private String[] bigTableValueColumnNames; + private TypeInfo[] bigTableValueTypeInfos; + private VectorExpression[] bigTableValueExpressions; + + private VectorColumnOutputMapping bigTableRetainedMapping; + private VectorColumnOutputMapping bigTableOuterKeyMapping; + private VectorColumnSourceMapping smallTableMapping; + + private VectorColumnSourceMapping projectionMapping; + + public VectorMapJoinInfo() { + bigTableKeyColumnMap = null; + bigTableKeyColumnNames = null; + bigTableKeyTypeInfos = null; + bigTableKeyExpressions = null; + + bigTableValueColumnMap = null; + bigTableValueColumnNames = null; + bigTableValueTypeInfos = null; + bigTableValueExpressions = null; + + bigTableRetainedMapping = null; + bigTableOuterKeyMapping = null; + smallTableMapping = null; + + projectionMapping = null; + } + + public int[] getBigTableKeyColumnMap() { + return bigTableKeyColumnMap; + } + + public void setBigTableKeyColumnMap(int[] bigTableKeyColumnMap) { + this.bigTableKeyColumnMap = bigTableKeyColumnMap; + } + + public String[] getBigTableKeyColumnNames() { + return bigTableKeyColumnNames; + } + + public void setBigTableKeyColumnNames(String[] bigTableKeyColumnNames) { + this.bigTableKeyColumnNames = bigTableKeyColumnNames; + } + + public TypeInfo[] getBigTableKeyTypeInfos() { + return bigTableKeyTypeInfos; + } + + public void setBigTableKeyTypeInfos(TypeInfo[] bigTableKeyTypeInfos) { + this.bigTableKeyTypeInfos = bigTableKeyTypeInfos; + } + + public VectorExpression[] getBigTableKeyExpressions() { + return bigTableKeyExpressions; + } + + public void setBigTableKeyExpressions(VectorExpression[] bigTableKeyExpressions) { + this.bigTableKeyExpressions = bigTableKeyExpressions; + } + + + public int[] getBigTableValueColumnMap() { + return bigTableValueColumnMap; + } + + public void setBigTableValueColumnMap(int[] bigTableValueColumnMap) { + this.bigTableValueColumnMap = bigTableValueColumnMap; + } + + public String[] getBigTableValueColumnNames() { + return bigTableValueColumnNames; + } + + public void setBigTableValueColumnNames(String[] bigTableValueColumnNames) { + this.bigTableValueColumnNames = bigTableValueColumnNames; + } + + public TypeInfo[] getBigTableValueTypeInfos() { + return bigTableValueTypeInfos; + } + + public void setBigTableValueTypeInfos(TypeInfo[] bigTableValueTypeInfos) { + this.bigTableValueTypeInfos = bigTableValueTypeInfos; + } + + public VectorExpression[] getBigTableValueExpressions() { + return bigTableValueExpressions; + } + + public void setBigTableValueExpressions(VectorExpression[] bigTableValueExpressions) { + this.bigTableValueExpressions = bigTableValueExpressions; + } + + public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { + this.bigTableRetainedMapping = bigTableRetainedMapping; + } + + public VectorColumnOutputMapping getBigTableRetainedMapping() { + return bigTableRetainedMapping; + } + + public void setBigTableOuterKeyMapping(VectorColumnOutputMapping bigTableOuterKeyMapping) { + this.bigTableOuterKeyMapping = bigTableOuterKeyMapping; + } + + public VectorColumnOutputMapping getBigTableOuterKeyMapping() { + return bigTableOuterKeyMapping; + } + + public void setSmallTableMapping(VectorColumnSourceMapping smallTableMapping) { + this.smallTableMapping = smallTableMapping; + } + + public VectorColumnSourceMapping getSmallTableMapping() { + return smallTableMapping; + } + + public void setProjectionMapping(VectorColumnSourceMapping projectionMapping) { + this.projectionMapping = projectionMapping; + } + + public VectorColumnSourceMapping getProjectionMapping() { + return projectionMapping; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java index c56bff6..288a440 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java @@ -61,4 +61,72 @@ public void setVectorReduceSinkInfo(VectorReduceSinkInfo vectorReduceSinkInfo) { public VectorReduceSinkInfo getVectorReduceSinkInfo() { return vectorReduceSinkInfo; } + + private boolean isVectorizationReduceSinkNativeEnabled; + private String engine; + private boolean acidChange; + private boolean hasBuckets; + private boolean hasTopN; + private boolean useUniformHash; + private boolean hasDistinctColumns; + private boolean isKeyBinarySortable; + private boolean isValueLazyBinary; + + /* + * The following conditions are for native Vector ReduceSink. + */ + public void setIsVectorizationReduceSinkNativeEnabled(boolean isVectorizationReduceSinkNativeEnabled) { + this.isVectorizationReduceSinkNativeEnabled = isVectorizationReduceSinkNativeEnabled; + } + public boolean getIsVectorizationReduceSinkNativeEnabled() { + return isVectorizationReduceSinkNativeEnabled; + } + public void setEngine(String engine) { + this.engine = engine; + } + public String getEngine() { + return engine; + } + public void setAcidChange(boolean acidChange) { + this.acidChange = acidChange; + } + public boolean getAcidChange() { + return acidChange; + } + public void setHasBuckets(boolean hasBuckets) { + this.hasBuckets = hasBuckets; + } + public boolean getHasBuckets() { + return hasBuckets; + } + public void setHasTopN(boolean hasTopN) { + this.hasTopN = hasTopN; + } + public boolean getHasTopN() { + return hasTopN; + } + public void setUseUniformHash(boolean useUniformHash) { + this.useUniformHash = useUniformHash; + } + public boolean getUseUniformHash() { + return useUniformHash; + } + public void setHasDistinctColumns(boolean hasDistinctColumns) { + this.hasDistinctColumns = hasDistinctColumns; + } + public boolean getHasDistinctColumns() { + return hasDistinctColumns; + } + public void setIsKeyBinarySortable(boolean isKeyBinarySortable) { + this.isKeyBinarySortable = isKeyBinarySortable; + } + public boolean getIsKeyBinarySortable() { + return isKeyBinarySortable; + } + public void setIsValueLazyBinary(boolean isValueLazyBinary) { + this.isValueLazyBinary = isValueLazyBinary; + } + public boolean getIsValueLazyBinary() { + return isValueLazyBinary; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java new file mode 100644 index 0000000..ab578cd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorSMBMapJoinDesc. + * + * Extra parameters beyond SMBMapJoinDesc just for the VectorSMBMapJoinOperator. + * + * We don't extend SMBMapJoinDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSMBJoinDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorSMBJoinDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java new file mode 100644 index 0000000..c2c9450 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * VectorSelectDesc. + * + * Extra parameters beyond SelectDesc just for the VectorSelectOperator. + * + * We don't extend SelectDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSelectDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + private VectorExpression[] selectExpressions; + private int[] projectedOutputColumns; + + public VectorSelectDesc() { + } + + public void setSelectExpressions(VectorExpression[] selectExpressions) { + this.selectExpressions = selectExpressions; + } + + public VectorExpression[] getSelectExpressions() { + return selectExpressions; + } + + public void setProjectedOutputColumns(int[] projectedOutputColumns) { + this.projectedOutputColumns = projectedOutputColumns; + } + + public int[] getProjectedOutputColumns() { + return projectedOutputColumns; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkHashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkHashTableSinkDesc.java new file mode 100644 index 0000000..7fb59db --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSparkHashTableSinkDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorHashTableSinkDesc. + * + * Extra parameters beyond HashTableSinkDesc just for the VectorHashTableSinkOperator. + * + * We don't extend HashTableSinkDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSparkHashTableSinkDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorSparkHashTableSinkDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTableScanDesc.java new file mode 100644 index 0000000..6e5ebe4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTableScanDesc.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorTableScanDesc. + * + * Extra parameters beyond TableScanDesc just for the VectorTableScanOperator. + * + * We don't extend TableScanDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorTableScanDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + private int[] projectedOutputColumns; + + public VectorTableScanDesc() { + } + + public void setProjectedOutputColumns(int[] projectedOutputColumns) { + this.projectedOutputColumns = projectedOutputColumns; + } + + public int[] getProjectedOutputColumns() { + return projectedOutputColumns; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java new file mode 100644 index 0000000..32b62e8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class VectorizationCondition { + + private final boolean flag; + private final String conditionName; + + public VectorizationCondition(boolean flag, String conditionName) { + this.flag = flag; + this.conditionName = conditionName; + } + + public boolean getFlag() { + return flag; + } + + public String getConditionName() { + return conditionName; + } + + public static List getConditionsMet(VectorizationCondition[] conditions) { + List metList = new ArrayList(); + for (VectorizationCondition condition : conditions) { + if (condition.getFlag()) { + metList.add(condition.getConditionName() + " IS true"); + } + } + return metList; + } + + public static List getConditionsNotMet(VectorizationCondition[] conditions) { + List notMetList = new ArrayList(); + for (VectorizationCondition condition : conditions) { + if (!condition.getFlag()) { + notMetList.add(condition.getConditionName() + " IS false"); + } + } + return notMetList; + } + + public static List addBooleans(List conditions, boolean flag) { + ArrayList result = new ArrayList(conditions.size()); + for (String condition : conditions) { + result.add(condition + " IS " + flag); + } + return result; + } + + // Helper method. + public static List getConditionsSupported(boolean isSupported) { + return Arrays.asList("Supported IS " + isSupported); + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index d3bb84d..22b845d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -25,13 +25,19 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; import org.junit.Test; /** @@ -89,10 +95,15 @@ private VectorFilterOperator getAVectorFilterOperator() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); List columns = new ArrayList(); columns.add("col1"); - VectorizationContext vc = new VectorizationContext("name", columns); FilterDesc fdesc = new FilterDesc(); fdesc.setPredicate(col1Expr); - return new VectorFilterOperator(new CompilationOpContext(), vc, fdesc); + + Operator filterOp = + OperatorFactory.get(new CompilationOpContext(), fdesc); + + VectorizationContext vc = new VectorizationContext("name", columns); + + return (VectorFilterOperator) Vectorizer.vectorizeFilterOperator(filterOp, vc); } @Test diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index f5b5d9d..086c0b7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -39,16 +39,20 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromConcat; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromLongIterables; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromRepeats; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -128,9 +132,11 @@ private static GroupByDesc buildGroupByDescType( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); - desc.getVectorDesc().setProcessingMode(ProcessingMode.GLOBAL); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.GLOBAL); return desc; } @@ -146,6 +152,8 @@ private static GroupByDesc buildGroupByDescCountStar( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); @@ -162,7 +170,7 @@ private static GroupByDesc buildKeyGroupByDesc( TypeInfo keyTypeInfo) { GroupByDesc desc = buildGroupByDescType(ctx, aggregate, GenericUDAFEvaluator.Mode.PARTIAL1, column, dataTypeInfo); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); ExprNodeDesc keyExp = buildColumnDesc(ctx, key, keyTypeInfo); ArrayList keys = new ArrayList(); @@ -196,7 +204,11 @@ public void testMemoryPressureFlush() throws HiveException { desc.setMemoryThreshold(treshold); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -1735,13 +1747,19 @@ private void testMultiKey( } GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); desc.setKeys(keysDesc); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -1846,9 +1864,11 @@ private void testKeyTypeAggregate( outputColumnNames.add("_col1"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0])); @@ -1857,7 +1877,11 @@ private void testKeyTypeAggregate( desc.setKeys(keysDesc); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2252,10 +2276,14 @@ public void testAggregateCountStarIterable ( VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescCountStar (ctx); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2283,10 +2311,14 @@ public void testAggregateCountReduceIterable ( VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, "count", GenericUDAFEvaluator.Mode.FINAL, "A", TypeInfoFactory.longTypeInfo); - VectorGroupByDesc vectorDesc = desc.getVectorDesc(); + VectorGroupByDesc vectorDesc = (VectorGroupByDesc) desc.getVectorDesc(); vectorDesc.setProcessingMode(ProcessingMode.GLOBAL); // Use GLOBAL when no key for Reduce. CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2318,7 +2350,11 @@ public void testAggregateStringIterable ( TypeInfoFactory.stringTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2350,7 +2386,11 @@ public void testAggregateDecimalIterable ( buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2383,7 +2423,11 @@ public void testAggregateDoubleIterable ( TypeInfoFactory.doubleTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2414,7 +2458,11 @@ public void testAggregateLongIterable ( GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(null, null); @@ -2449,7 +2497,11 @@ public void testAggregateLongKeyIterable ( TypeInfoFactory.longTypeInfo, "Key", TypeInfoFactory.longTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2515,7 +2567,11 @@ public void testAggregateStringKeyIterable ( dataTypeInfo, "Key", TypeInfoFactory.stringTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 779177a..614b1d1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -26,6 +26,7 @@ import java.util.Map; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -51,6 +53,7 @@ public ValidatorVectorSelectOperator(CompilationOpContext ctx, VectorizationContext ctxt, OperatorDesc conf) throws HiveException { super(ctx, ctxt, conf); + initializeOp(null); } @@ -115,6 +118,19 @@ public void testSelectOperator() throws HiveException { outputColNames.add("_col1"); selDesc.setOutputColumnNames(outputColNames); + // CONSIDER unwinding ValidatorVectorSelectOperator as a subclass of VectorSelectOperator. + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + selDesc.setVectorDesc(vectorSelectDesc); + List selectColList = selDesc.getColList(); + VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()]; + for (int i = 0; i < selectColList.size(); i++) { + ExprNodeDesc expr = selectColList.get(i); + VectorExpression ve = vc.getVectorExpression(expr); + vectorSelectExprs[i] = ve; + } + vectorSelectDesc.setSelectExpressions(vectorSelectExprs); + vectorSelectDesc.setProjectedOutputColumns(new int[] {3, 2}); + ValidatorVectorSelectOperator vso = new ValidatorVectorSelectOperator( new CompilationOpContext(), vc, selDesc); diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 3295372..ccd1059 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -96,6 +96,8 @@ public void testAggregateOnUDF() throws HiveException { outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); ArrayList aggDescList = new ArrayList(); aggDescList.add(aggDesc); @@ -106,13 +108,14 @@ public void testAggregateOnUDF() throws HiveException { grpByKeys.add(colExprB); desc.setKeys(grpByKeys); - GroupByOperator gbyOp = new GroupByOperator(new CompilationOpContext()); - gbyOp.setConf(desc); + Operator gbyOp = OperatorFactory.get(new CompilationOpContext(), desc); + desc.setMode(GroupByDesc.Mode.HASH); Vectorizer v = new Vectorizer(); + v.testSetCurrentBaseWork(new MapWork()); Assert.assertTrue(v.validateMapWorkOperator(gbyOp, null, false)); - VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext, false); + VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext, false, null); Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass()); VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0]; Assert.assertEquals(FuncAbsLongToLong.class, udaf.getInputExpression().getClass()); @@ -147,8 +150,9 @@ public void testValidateNestedExpressions() { andExprDesc.setChildren(children3); Vectorizer v = new Vectorizer(); - Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.FILTER)); - Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION)); + v.testSetCurrentBaseWork(new MapWork()); + Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.FILTER)); + Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.PROJECTION)); } /** @@ -196,6 +200,7 @@ public void testValidateMapJoinOperator() { map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); + vectorizer.testSetCurrentBaseWork(new MapWork()); Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } @@ -212,6 +217,7 @@ public void testValidateSMBJoinOperator() { map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); + vectorizer.testSetCurrentBaseWork(new MapWork()); Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } } diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_part.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_part.q index 7e66cbc..48903d2 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_part.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_part.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -38,7 +38,7 @@ alter table part_add_int_permute_select add columns(c int); insert into table part_add_int_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -61,7 +61,7 @@ alter table part_add_int_string_permute_select add columns(c int, d string); insert into table part_add_int_string_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333, '4444'); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -93,7 +93,7 @@ alter table part_change_string_group_double replace columns (insert_num int, c1 insert into table part_change_string_group_double partition(part=1) SELECT insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE insert_num = 111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double; select insert_num,part,c1,c2,c3,b from part_change_string_group_double; @@ -116,7 +116,7 @@ alter table part_change_date_group_string_group_date_timestamp replace columns(i insert into table part_change_date_group_string_group_date_timestamp partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; @@ -164,7 +164,7 @@ insert into table part_change_numeric_group_string_group_multi_ints_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; @@ -207,7 +207,7 @@ insert into table part_change_numeric_group_string_group_floating_string_group p 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; @@ -249,7 +249,7 @@ insert into table part_change_string_group_string_group_string partition(part=1) 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; @@ -299,7 +299,7 @@ insert into table part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa 1234.5678, 9876.543, 789.321, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; @@ -330,7 +330,7 @@ alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace c insert into table part_change_lower_to_higher_numeric_group_decimal_to_float partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_complex.q index ac747e6..45afd9d 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_complex.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -51,7 +51,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct1_c.txt' insert into table part_change_various_various_struct1 partition(part=1) select * from complex_struct1_c_txt; -explain + explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1; select insert_num,part,s1,b from part_change_various_various_struct1; @@ -111,7 +111,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct2_d.txt' insert into table part_add_various_various_struct2 partition(part=1) select * from complex_struct2_d_txt; -explain +explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2; select insert_num,part,b,s2 from part_add_various_various_struct2; @@ -155,7 +155,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct4_c.txt' insert into table part_add_to_various_various_struct4 partition(part=1) select * from complex_struct4_c_txt; -explain +explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4; select insert_num,part,b,s3 from part_add_to_various_various_struct4; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q index d3898a8..b266a67 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -72,7 +72,7 @@ insert into table part_change_various_various_boolean_to_bigint partition(part=1 bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, 'new' FROM schema_evolution_data; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; @@ -114,7 +114,7 @@ insert into table part_change_various_various_decimal_to_double partition(part=1 double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; @@ -138,7 +138,7 @@ alter table part_change_various_various_timestamp replace columns (insert_num in insert into table part_change_various_various_timestamp partition(part=1) SELECT insert_num, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; @@ -159,7 +159,7 @@ alter table part_change_various_various_date replace columns (insert_num int, c1 insert into table part_change_various_various_date partition(part=1) SELECT insert_num, date1, date1, date1, date1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; @@ -198,7 +198,7 @@ load data local inpath '../../data/files/schema_evolution/same_type1_c.txt' over insert into table part_change_same_type_different_params partition(part=2) select * from same_type1_c_txt; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_table.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_table.q index ffaa07b..866942e 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_table.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_table.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=true; @@ -36,7 +36,7 @@ alter table table_add_int_permute_select add columns(c int); insert into table table_add_int_permute_select VALUES (111, 80000, 'new', 80000); -explain +explain vectorization detail select insert_num,a,b from table_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -59,7 +59,7 @@ alter table table_add_int_string_permute_select add columns(c int, d string); insert into table table_add_int_string_permute_select VALUES (111, 80000, 'new', 80000, 'filler'); -explain +explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -91,7 +91,7 @@ alter table table_change_string_group_double replace columns (insert_num int, c1 insert into table table_change_string_group_double VALUES (111, 789.321, 789.321, 789.321, 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double; select insert_num,c1,c2,c3,b from table_change_string_group_double; @@ -158,7 +158,7 @@ insert into table table_change_numeric_group_string_group_multi_ints_string_grou 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; @@ -201,7 +201,7 @@ insert into table table_change_numeric_group_string_group_floating_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_part.q ql/src/test/queries/clientpositive/schema_evol_text_vec_part.q index 6582035..77c863a 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_part.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_part.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -39,7 +39,7 @@ alter table part_add_int_permute_select add columns(c int); insert into table part_add_int_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -62,7 +62,7 @@ alter table part_add_int_string_permute_select add columns(c int, d string); insert into table part_add_int_string_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333, '4444'); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -94,7 +94,7 @@ alter table part_change_string_group_double replace columns (insert_num int, c1 insert into table part_change_string_group_double partition(part=1) SELECT insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE insert_num = 111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double; select insert_num,part,c1,c2,c3,b from part_change_string_group_double; @@ -117,7 +117,7 @@ alter table part_change_date_group_string_group_date_timestamp replace columns(i insert into table part_change_date_group_string_group_date_timestamp partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; @@ -165,7 +165,7 @@ insert into table part_change_numeric_group_string_group_multi_ints_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; @@ -208,7 +208,7 @@ insert into table part_change_numeric_group_string_group_floating_string_group p 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; @@ -250,7 +250,7 @@ insert into table part_change_string_group_string_group_string partition(part=1) 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; @@ -300,7 +300,7 @@ insert into table part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa 1234.5678, 9876.543, 789.321, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; @@ -331,7 +331,7 @@ alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace c insert into table part_change_lower_to_higher_numeric_group_decimal_to_float partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q index e38a01e..7eb72e0 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -53,7 +53,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct1_c.txt' insert into table part_change_various_various_struct1 partition(part=1) select * from complex_struct1_c_txt; -explain +explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1; select insert_num,part,s1,b from part_change_various_various_struct1; @@ -113,7 +113,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct2_d.txt' insert into table part_add_various_various_struct2 partition(part=1) select * from complex_struct2_d_txt; -explain +explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2; select insert_num,part,b,s2 from part_add_various_various_struct2; @@ -157,7 +157,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct4_c.txt' insert into table part_add_to_various_various_struct4 partition(part=1) select * from complex_struct4_c_txt; -explain +explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4; select insert_num,part,b,s3 from part_add_to_various_various_struct4; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_primitive.q ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_primitive.q index c9d90c3..d5c01cd 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_primitive.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_primitive.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -74,7 +74,7 @@ insert into table part_change_various_various_boolean_to_bigint partition(part=1 bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, 'new' FROM schema_evolution_data; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; @@ -116,7 +116,7 @@ insert into table part_change_various_various_decimal_to_double partition(part=1 double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; @@ -140,7 +140,7 @@ alter table part_change_various_various_timestamp replace columns (insert_num in insert into table part_change_various_various_timestamp partition(part=1) SELECT insert_num, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; @@ -161,7 +161,7 @@ alter table part_change_various_various_date replace columns (insert_num int, c1 insert into table part_change_various_various_date partition(part=1) SELECT insert_num, date1, date1, date1, date1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; @@ -200,7 +200,7 @@ load data local inpath '../../data/files/schema_evolution/same_type1_c.txt' over insert into table part_change_same_type_different_params partition(part=2) select * from same_type1_c_txt; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q index 7785f87..bbf03af 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=false; @@ -38,7 +38,7 @@ alter table table_add_int_permute_select add columns(c int); insert into table table_add_int_permute_select VALUES (111, 80000, 'new', 80000); -explain +explain vectorization detail select insert_num,a,b from table_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -61,7 +61,7 @@ alter table table_add_int_string_permute_select add columns(c int, d string); insert into table table_add_int_string_permute_select VALUES (111, 80000, 'new', 80000, 'filler'); -explain +explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -93,7 +93,7 @@ alter table table_change_string_group_double replace columns (insert_num int, c1 insert into table table_change_string_group_double VALUES (111, 789.321, 789.321, 789.321, 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double; select insert_num,c1,c2,c3,b from table_change_string_group_double; @@ -160,7 +160,7 @@ insert into table table_change_numeric_group_string_group_multi_ints_string_grou 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; @@ -203,7 +203,7 @@ insert into table table_change_numeric_group_string_group_floating_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q index 8ed041b..11df12e 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -39,7 +39,7 @@ alter table part_add_int_permute_select add columns(c int); insert into table part_add_int_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -62,7 +62,7 @@ alter table part_add_int_string_permute_select add columns(c int, d string); insert into table part_add_int_string_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333, '4444'); -explain +explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -94,7 +94,7 @@ alter table part_change_string_group_double replace columns (insert_num int, c1 insert into table part_change_string_group_double partition(part=1) SELECT insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE insert_num = 111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double; select insert_num,part,c1,c2,c3,b from part_change_string_group_double; @@ -117,7 +117,7 @@ alter table part_change_date_group_string_group_date_timestamp replace columns(i insert into table part_change_date_group_string_group_date_timestamp partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp; @@ -165,7 +165,7 @@ insert into table part_change_numeric_group_string_group_multi_ints_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group; @@ -208,7 +208,7 @@ insert into table part_change_numeric_group_string_group_floating_string_group p 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group; @@ -250,7 +250,7 @@ insert into table part_change_string_group_string_group_string partition(part=1) 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string; @@ -300,7 +300,7 @@ insert into table part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa 1234.5678, 9876.543, 789.321, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint; @@ -331,7 +331,7 @@ alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace c insert into table part_change_lower_to_higher_numeric_group_decimal_to_float partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new'); -explain +explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q index b9d035e..dfd55d9 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -54,7 +54,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct1_c.txt' insert into table part_change_various_various_struct1 partition(part=1) select * from complex_struct1_c_txt; -explain +explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1; select insert_num,part,s1,b from part_change_various_various_struct1; @@ -114,7 +114,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct2_d.txt' insert into table part_add_various_various_struct2 partition(part=1) select * from complex_struct2_d_txt; -explain +explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2; select insert_num,part,b,s2 from part_add_various_various_struct2; @@ -158,7 +158,7 @@ load data local inpath '../../data/files/schema_evolution/complex_struct4_c.txt' insert into table part_add_to_various_various_struct4 partition(part=1) select * from complex_struct4_c_txt; -explain +explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4; select insert_num,part,b,s3 from part_add_to_various_various_struct4; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q index f5b0485..d71c6b8 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; @@ -74,7 +74,7 @@ insert into table part_change_various_various_boolean_to_bigint partition(part=1 bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, bigint1, 'new' FROM schema_evolution_data; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint; @@ -116,7 +116,7 @@ insert into table part_change_various_various_decimal_to_double partition(part=1 double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, double1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double; @@ -140,7 +140,7 @@ alter table part_change_various_various_timestamp replace columns (insert_num in insert into table part_change_various_various_timestamp partition(part=1) SELECT insert_num, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp; @@ -161,7 +161,7 @@ alter table part_change_various_various_date replace columns (insert_num int, c1 insert into table part_change_various_various_date partition(part=1) SELECT insert_num, date1, date1, date1, date1, 'new' FROM schema_evolution_data_2 WHERE insert_num=111; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date; @@ -200,7 +200,7 @@ load data local inpath '../../data/files/schema_evolution/same_type1_c.txt' over insert into table part_change_same_type_different_params partition(part=2) select * from same_type1_c_txt; -explain +explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params; diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q index c8d5458..d4209a5 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=false; @@ -38,7 +38,7 @@ alter table table_add_int_permute_select add columns(c int); insert into table table_add_int_permute_select VALUES (111, 80000, 'new', 80000); -explain +explain vectorization detail select insert_num,a,b from table_add_int_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -61,7 +61,7 @@ alter table table_add_int_string_permute_select add columns(c int, d string); insert into table table_add_int_string_permute_select VALUES (111, 80000, 'new', 80000, 'filler'); -explain +explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select; -- SELECT permutation columns to make sure NULL defaulting works right @@ -93,7 +93,7 @@ alter table table_change_string_group_double replace columns (insert_num int, c1 insert into table table_change_string_group_double VALUES (111, 789.321, 789.321, 789.321, 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double; select insert_num,c1,c2,c3,b from table_change_string_group_double; @@ -160,7 +160,7 @@ insert into table table_change_numeric_group_string_group_multi_ints_string_grou 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group; @@ -203,7 +203,7 @@ insert into table table_change_numeric_group_string_group_floating_string_group 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new'); -explain +explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group; diff --git ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q index cef4e4c..449bea2 100644 --- ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q +++ ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q @@ -32,7 +32,7 @@ insert into table count_case_groupby values ('key1', true),('key2', false),('key set hive.vectorized.adaptor.usage.mode=none; -explain +explain vectorization expression select c2 regexp 'val', c4 regexp 'val', @@ -45,7 +45,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -58,7 +58,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -74,7 +74,7 @@ from varchar_udf_1 limit 1; set hive.vectorized.adaptor.usage.mode=chosen; -explain +explain vectorization expression select c2 regexp 'val', c4 regexp 'val', @@ -87,7 +87,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -100,7 +100,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -116,11 +116,11 @@ from varchar_udf_1 limit 1; set hive.vectorized.adaptor.usage.mode=none; -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF; SELECT POWER(key, 2) FROM DECIMAL_UDF; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), @@ -135,11 +135,11 @@ FROM DECIMAL_UDF WHERE key = 10; set hive.vectorized.adaptor.usage.mode=chosen; -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF; SELECT POWER(key, 2) FROM DECIMAL_UDF; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), @@ -155,14 +155,14 @@ FROM DECIMAL_UDF WHERE key = 10; set hive.vectorized.adaptor.usage.mode=none; -explain +explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; set hive.vectorized.adaptor.usage.mode=chosen; -explain +explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; diff --git ql/src/test/queries/clientpositive/vector_aggregate_9.q ql/src/test/queries/clientpositive/vector_aggregate_9.q index ce6f0ff..04fdeec 100644 --- ql/src/test/queries/clientpositive/vector_aggregate_9.q +++ ql/src/test/queries/clientpositive/vector_aggregate_9.q @@ -1,5 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table vectortab2k( t tinyint, @@ -38,7 +39,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q index 8a63635..64440e3 100644 --- ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q +++ ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=true; +set hive.fetch.task.conversion=none; + create table testvec(id int, dt int, greg_dt string) stored as orc; insert into table testvec values @@ -12,5 +14,5 @@ values (7,20150404, '2015-04-04'); set hive.vectorized.execution.enabled=true; set hive.map.aggr=true; -explain select max(dt), max(greg_dt) from testvec where id=5; +explain vectorization select max(dt), max(greg_dt) from testvec where id=5; select max(dt), max(greg_dt) from testvec where id=5; diff --git ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q index 2077f8e..3c53853 100644 --- ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q +++ ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=true; +set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -; +set hive.fetch.task.conversion=none; set hive.exec.reducers.max = 1; @@ -23,7 +23,7 @@ set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; set hive.auto.convert.sortmerge.join=true; -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1; @@ -33,7 +33,7 @@ select count(*) from ( ) subq1; -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select key, count(*) from @@ -54,7 +54,7 @@ select count(*) from -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization expression select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -89,7 +89,7 @@ on src1.key = src2.key; -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -104,7 +104,7 @@ select count(*) from -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization expression select count(*) from ( select * from @@ -129,7 +129,7 @@ select count(*) from -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select * from @@ -169,7 +169,7 @@ select count(*) from -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization expression select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -184,7 +184,7 @@ select count(*) from -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization expression select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -199,7 +199,7 @@ select count(*) from -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key; @@ -210,7 +210,7 @@ select count(*) from -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -231,7 +231,7 @@ select count(*) from -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -261,7 +261,7 @@ CREATE TABLE dest2(key int, val1 string, val2 string); -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -282,7 +282,7 @@ CREATE TABLE dest2(key int, cnt int); -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 diff --git ql/src/test/queries/clientpositive/vector_between_columns.q ql/src/test/queries/clientpositive/vector_between_columns.q index 41f9243..920b692 100644 --- ql/src/test/queries/clientpositive/vector_between_columns.q +++ ql/src/test/queries/clientpositive/vector_between_columns.q @@ -24,13 +24,13 @@ create table TSINT stored as orc AS SELECT * FROM TSINT_txt; create table TINT stored as orc AS SELECT * FROM TINT_txt; -explain +explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint; select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint; -explain +explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; diff --git ql/src/test/queries/clientpositive/vector_between_in.q ql/src/test/queries/clientpositive/vector_between_in.q index 487bf96..b9f4b13 100644 --- ql/src/test/queries/clientpositive/vector_between_in.q +++ ql/src/test/queries/clientpositive/vector_between_in.q @@ -1,24 +1,25 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; CREATE TABLE decimal_date_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, CAST(CAST((CAST(cint AS BIGINT) *ctinyint) AS TIMESTAMP) AS DATE) AS cdate FROM alltypesorc ORDER BY cdate; -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)); +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)); -EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568); +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568); -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate; -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate; -EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351; +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351; SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; @@ -40,13 +41,13 @@ SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 439 -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0; +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0; +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0; +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0; SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; diff --git ql/src/test/queries/clientpositive/vector_binary_join_groupby.q ql/src/test/queries/clientpositive/vector_binary_join_groupby.q index 1d99e34..d19dbc3 100644 --- ql/src/test/queries/clientpositive/vector_binary_join_groupby.q +++ ql/src/test/queries/clientpositive/vector_binary_join_groupby.q @@ -4,6 +4,7 @@ SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; DROP TABLE over1k; DROP TABLE hundredorc; @@ -40,14 +41,14 @@ STORED AS ORC; INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin; @@ -58,6 +59,6 @@ GROUP BY bin; -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_bround.q ql/src/test/queries/clientpositive/vector_bround.q index deea00b..ffa3ad3 100644 --- ql/src/test/queries/clientpositive/vector_bround.q +++ ql/src/test/queries/clientpositive/vector_bround.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=true; +SET hive.fetch.task.conversion=none; + create table test_vector_bround(v0 double, v1 double) stored as orc; insert into table test_vector_bround values @@ -12,5 +14,5 @@ values (2.51, 1.251), (3.51, 1.351); set hive.vectorized.execution.enabled=true; -explain select bround(v0), bround(v1, 1) from test_vector_bround; +explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround; select bround(v0), bround(v1, 1) from test_vector_bround; diff --git ql/src/test/queries/clientpositive/vector_bucket.q ql/src/test/queries/clientpositive/vector_bucket.q index 39436c9..b67592e 100644 --- ql/src/test/queries/clientpositive/vector_bucket.q +++ ql/src/test/queries/clientpositive/vector_bucket.q @@ -2,12 +2,13 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.support.concurrency=true; +set hive.fetch.task.conversion=none; CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile; -explain +explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three'); select a, b from non_orc_table order by a; insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three'); select a, b from non_orc_table order by a; diff --git ql/src/test/queries/clientpositive/vector_cast_constant.q ql/src/test/queries/clientpositive/vector_cast_constant.q index 94bee09..aac7f92 100644 --- ql/src/test/queries/clientpositive/vector_cast_constant.q +++ ql/src/test/queries/clientpositive/vector_cast_constant.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; - +set hive.fetch.task.conversion=none; DROP TABLE over1k; DROP TABLE over1korc; @@ -38,7 +38,7 @@ STORED AS ORC; INSERT INTO TABLE over1korc SELECT * FROM over1k; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, diff --git ql/src/test/queries/clientpositive/vector_char_2.q ql/src/test/queries/clientpositive/vector_char_2.q index f1bb75b..5520ddd 100644 --- ql/src/test/queries/clientpositive/vector_char_2.q +++ ql/src/test/queries/clientpositive/vector_char_2.q @@ -1,6 +1,8 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + drop table char_2; create table char_2 ( @@ -16,7 +18,7 @@ group by value order by value asc limit 5; -explain select value, sum(cast(key as int)), count(*) numrows +explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc @@ -35,7 +37,7 @@ group by value order by value desc limit 5; -explain select value, sum(cast(key as int)), count(*) numrows +explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc diff --git ql/src/test/queries/clientpositive/vector_char_4.q ql/src/test/queries/clientpositive/vector_char_4.q index 06f1d2b..edc59cd 100644 --- ql/src/test/queries/clientpositive/vector_char_4.q +++ ql/src/test/queries/clientpositive/vector_char_4.q @@ -1,5 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; drop table if exists vectortab2k; drop table if exists vectortab2korc; @@ -44,7 +45,7 @@ INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; drop table if exists char_lazy_binary_columnar; create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; -explain +explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; -- insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_char_cast.q ql/src/test/queries/clientpositive/vector_char_cast.q index bc78d51..c7d3c3c 100644 --- ql/src/test/queries/clientpositive/vector_char_cast.q +++ ql/src/test/queries/clientpositive/vector_char_cast.q @@ -1,3 +1,5 @@ +set hive.fetch.task.conversion=none; + create table s1(id smallint) stored as orc; insert into table s1 values (1000),(1001),(1002),(1003),(1000); diff --git ql/src/test/queries/clientpositive/vector_char_mapjoin1.q ql/src/test/queries/clientpositive/vector_char_mapjoin1.q index 58a73be..f5c05a4 100644 --- ql/src/test/queries/clientpositive/vector_char_mapjoin1.q +++ ql/src/test/queries/clientpositive/vector_char_mapjoin1.q @@ -37,21 +37,21 @@ create table char_join1_vc2_orc stored as orc as select * from char_join1_vc2; create table char_join1_str_orc stored as orc as select * from char_join1_str; -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q index a921140..c315241 100644 --- ql/src/test/queries/clientpositive/vector_char_simple.q +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -1,5 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + drop table char_2; create table char_2 ( @@ -14,7 +16,7 @@ from src order by key asc limit 5; -explain select key, value +explain vectorization only select key, value from char_2 order by key asc limit 5; @@ -30,7 +32,7 @@ from src order by key desc limit 5; -explain select key, value +explain vectorization only select key, value from char_2 order by key desc limit 5; @@ -49,7 +51,7 @@ create table char_3 ( field char(12) ) stored as orc; -explain +explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10; insert into table char_3 select cint from alltypesorc limit 10; diff --git ql/src/test/queries/clientpositive/vector_coalesce.q ql/src/test/queries/clientpositive/vector_coalesce.q index cfba7be..11296e1 100644 --- ql/src/test/queries/clientpositive/vector_coalesce.q +++ ql/src/test/queries/clientpositive/vector_coalesce.q @@ -1,10 +1,11 @@ set hive.stats.fetch.column.stats=true; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c @@ -16,7 +17,7 @@ WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10; -EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c @@ -28,7 +29,7 @@ WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10; -EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c @@ -40,7 +41,7 @@ WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10; -EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c @@ -52,7 +53,7 @@ WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10; -EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c @@ -64,7 +65,7 @@ WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10; -EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_coalesce_2.q ql/src/test/queries/clientpositive/vector_coalesce_2.q index c847e20..ea45ddd 100644 --- ql/src/test/queries/clientpositive/vector_coalesce_2.q +++ ql/src/test/queries/clientpositive/vector_coalesce_2.q @@ -7,7 +7,7 @@ create table str_str_orc (str1 string, str2 string) stored as orc; insert into table str_str_orc values (null, "X"), ("0", "X"), ("1", "X"), (null, "y"); -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc @@ -18,7 +18,7 @@ SELECT from str_str_orc GROUP BY str2; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc; @@ -27,7 +27,7 @@ from str_str_orc; SET hive.vectorized.execution.enabled=true; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc @@ -38,7 +38,7 @@ SELECT from str_str_orc GROUP BY str2; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc; diff --git ql/src/test/queries/clientpositive/vector_complex_all.q ql/src/test/queries/clientpositive/vector_complex_all.q index 91a7368..bd1f4a9 100644 --- ql/src/test/queries/clientpositive/vector_complex_all.q +++ ql/src/test/queries/clientpositive/vector_complex_all.q @@ -27,19 +27,19 @@ CREATE TABLE orc_create_complex ( INSERT OVERWRITE TABLE orc_create_complex SELECT * FROM orc_create_staging; -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT * FROM orc_create_complex; SELECT * FROM orc_create_complex; -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM orc_create_complex; SELECT COUNT(*) FROM orc_create_complex; -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str FROM orc_create_complex ORDER BY str; SELECT str FROM orc_create_complex ORDER BY str; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_complex_join.q ql/src/test/queries/clientpositive/vector_complex_join.q index 30f38b1..c6926cb 100644 --- ql/src/test/queries/clientpositive/vector_complex_join.q +++ ql/src/test/queries/clientpositive/vector_complex_join.q @@ -10,7 +10,7 @@ set hive.fetch.task.conversion=none; CREATE TABLE test (a INT, b MAP) STORED AS ORC; INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1; -explain +explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a; select * from alltypesorc join test where alltypesorc.cint=test.a; @@ -23,7 +23,7 @@ INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1; CREATE TABLE test2b (a INT) STORED AS ORC; INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4); -explain +explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1]; select * from test2b join test2a on test2b.a = test2a.a[1]; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_count.q ql/src/test/queries/clientpositive/vector_count.q index 341db74..74b24cc 100644 --- ql/src/test/queries/clientpositive/vector_count.q +++ ql/src/test/queries/clientpositive/vector_count.q @@ -12,15 +12,15 @@ create table abcd stored as orc as select * from abcd_txt; select * from abcd; set hive.map.aggr=true; -explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; +explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; -explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; +explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; set hive.map.aggr=false; -explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; +explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; -explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; +explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; diff --git ql/src/test/queries/clientpositive/vector_count_distinct.q ql/src/test/queries/clientpositive/vector_count_distinct.q index ec72079..72ca3fa 100644 --- ql/src/test/queries/clientpositive/vector_count_distinct.q +++ ql/src/test/queries/clientpositive/vector_count_distinct.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table web_sales_txt ( @@ -104,7 +105,7 @@ select ws_sold_date_sk, ws_sold_time_sk, ws_ship_date_sk, ws_item_sk, ------------------------------------------------------------------------------------------ -explain +explain vectorization expression select count(distinct ws_order_number) from web_sales; select count(distinct ws_order_number) from web_sales; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_data_types.q ql/src/test/queries/clientpositive/vector_data_types.q index c7e0d1b..d3ee19b 100644 --- ql/src/test/queries/clientpositive/vector_data_types.q +++ ql/src/test/queries/clientpositive/vector_data_types.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; DROP TABLE over1k; DROP TABLE over1korc; @@ -38,7 +39,7 @@ INSERT INTO TABLE over1korc SELECT * FROM over1k; SET hive.vectorized.execution.enabled=false; -EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; +EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; @@ -47,7 +48,7 @@ FROM (SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, s SET hive.vectorized.execution.enabled=true; -EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; +EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; diff --git ql/src/test/queries/clientpositive/vector_date_1.q ql/src/test/queries/clientpositive/vector_date_1.q index 072ed5c..7e8768c 100644 --- ql/src/test/queries/clientpositive/vector_date_1.q +++ ql/src/test/queries/clientpositive/vector_date_1.q @@ -1,8 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; - set hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table if exists vector_date_1; create table vector_date_1 (dt1 date, dt2 date) stored as orc; diff --git ql/src/test/queries/clientpositive/vector_decimal_1.q ql/src/test/queries/clientpositive/vector_decimal_1.q index 8a1503f..e797892 100644 --- ql/src/test/queries/clientpositive/vector_decimal_1.q +++ ql/src/test/queries/clientpositive/vector_decimal_1.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table if exists decimal_1; diff --git ql/src/test/queries/clientpositive/vector_decimal_10_0.q ql/src/test/queries/clientpositive/vector_decimal_10_0.q index 596b2bd..14650f9 100644 --- ql/src/test/queries/clientpositive/vector_decimal_10_0.q +++ ql/src/test/queries/clientpositive/vector_decimal_10_0.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS decimal_txt; DROP TABLE IF EXISTS `decimal`; diff --git ql/src/test/queries/clientpositive/vector_decimal_2.q ql/src/test/queries/clientpositive/vector_decimal_2.q index f1477ce..e00fefe 100644 --- ql/src/test/queries/clientpositive/vector_decimal_2.q +++ ql/src/test/queries/clientpositive/vector_decimal_2.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table decimal_2; diff --git ql/src/test/queries/clientpositive/vector_decimal_3.q ql/src/test/queries/clientpositive/vector_decimal_3.q index 9fa5d6f..c23a652 100644 --- ql/src/test/queries/clientpositive/vector_decimal_3.q +++ ql/src/test/queries/clientpositive/vector_decimal_3.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_3_txt; DROP TABLE IF EXISTS DECIMAL_3; diff --git ql/src/test/queries/clientpositive/vector_decimal_4.q ql/src/test/queries/clientpositive/vector_decimal_4.q index 29c9875..0c34074 100644 --- ql/src/test/queries/clientpositive/vector_decimal_4.q +++ ql/src/test/queries/clientpositive/vector_decimal_4.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_4_1; DROP TABLE IF EXISTS DECIMAL_4_2; diff --git ql/src/test/queries/clientpositive/vector_decimal_5.q ql/src/test/queries/clientpositive/vector_decimal_5.q index 7cf604d..f5de13b 100644 --- ql/src/test/queries/clientpositive/vector_decimal_5.q +++ ql/src/test/queries/clientpositive/vector_decimal_5.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_5_txt; DROP TABLE IF EXISTS DECIMAL_5; diff --git ql/src/test/queries/clientpositive/vector_decimal_6.q ql/src/test/queries/clientpositive/vector_decimal_6.q index 1d0c3ae..fe145e6 100644 --- ql/src/test/queries/clientpositive/vector_decimal_6.q +++ ql/src/test/queries/clientpositive/vector_decimal_6.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_6_1_txt; DROP TABLE IF EXISTS DECIMAL_6_1; diff --git ql/src/test/queries/clientpositive/vector_decimal_aggregate.q ql/src/test/queries/clientpositive/vector_decimal_aggregate.q index 552a564..843b57e 100644 --- ql/src/test/queries/clientpositive/vector_decimal_aggregate.q +++ ql/src/test/queries/clientpositive/vector_decimal_aggregate.q @@ -1,4 +1,6 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; + CREATE TABLE decimal_vgby STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, @@ -10,7 +12,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -24,7 +26,7 @@ SELECT cint, HAVING COUNT(*) > 1; -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby diff --git ql/src/test/queries/clientpositive/vector_decimal_cast.q ql/src/test/queries/clientpositive/vector_decimal_cast.q index eb0e75c..fc8861e 100644 --- ql/src/test/queries/clientpositive/vector_decimal_cast.q +++ ql/src/test/queries/clientpositive/vector_decimal_cast.q @@ -1,6 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_decimal_expressions.q ql/src/test/queries/clientpositive/vector_decimal_expressions.q index 33d0747..864e552 100644 --- ql/src/test/queries/clientpositive/vector_decimal_expressions.q +++ ql/src/test/queries/clientpositive/vector_decimal_expressions.q @@ -1,11 +1,12 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q index 3007239..dac0317 100644 --- ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q @@ -4,6 +4,7 @@ SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; CREATE TABLE over1k(t tinyint, si smallint, @@ -26,7 +27,7 @@ INSERT INTO TABLE t1 select dec from over1k; CREATE TABLE t2(dec decimal(4,0)) STORED AS ORC; INSERT INTO TABLE t2 select dec from over1k; -explain +explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec); -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q index 4ebde6a..08e1e0f 100644 --- ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q +++ ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q @@ -1,11 +1,13 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; CREATE TABLE decimal_test STORED AS ORC AS SELECT cbigint, cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) diff --git ql/src/test/queries/clientpositive/vector_decimal_precision.q ql/src/test/queries/clientpositive/vector_decimal_precision.q index cc3fb63..97616f6 100644 --- ql/src/test/queries/clientpositive/vector_decimal_precision.q +++ ql/src/test/queries/clientpositive/vector_decimal_precision.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_PRECISION_txt; DROP TABLE IF EXISTS DECIMAL_PRECISION; @@ -26,7 +26,7 @@ SELECT dec, dec / 9 FROM DECIMAL_PRECISION ORDER BY dec; SELECT dec, dec / 27 FROM DECIMAL_PRECISION ORDER BY dec; SELECT dec, dec * dec FROM DECIMAL_PRECISION ORDER BY dec; -EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; +EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; SELECT dec * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1; diff --git ql/src/test/queries/clientpositive/vector_decimal_round.q ql/src/test/queries/clientpositive/vector_decimal_round.q index bf83163..ba20fef 100644 --- ql/src/test/queries/clientpositive/vector_decimal_round.q +++ ql/src/test/queries/clientpositive/vector_decimal_round.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; create table decimal_tbl_txt (dec decimal(10,0)) ROW FORMAT DELIMITED @@ -12,12 +12,12 @@ insert into table decimal_tbl_txt values(101); select * from decimal_tbl_txt; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec; select dec, round(dec, -1) from decimal_tbl_txt order by dec; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1); @@ -29,12 +29,12 @@ insert into table decimal_tbl_rc values(101); select * from decimal_tbl_rc; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec; select dec, round(dec, -1) from decimal_tbl_rc order by dec; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1); @@ -46,12 +46,12 @@ insert into table decimal_tbl_orc values(101); select * from decimal_tbl_orc; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec; select dec, round(dec, -1) from decimal_tbl_orc order by dec; -explain +explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_decimal_round_2.q ql/src/test/queries/clientpositive/vector_decimal_round_2.q index 0020325..7afc780 100644 --- ql/src/test/queries/clientpositive/vector_decimal_round_2.q +++ ql/src/test/queries/clientpositive/vector_decimal_round_2.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; create table decimal_tbl_1_orc (dec decimal(38,18)) STORED AS ORC; @@ -19,7 +19,7 @@ select * from decimal_tbl_1_orc; -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), @@ -39,7 +39,7 @@ insert into table decimal_tbl_2_orc values(125.315, -125.315); select * from decimal_tbl_2_orc; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -65,7 +65,7 @@ insert into table decimal_tbl_3_orc values(3.141592653589793); select * from decimal_tbl_3_orc; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -113,7 +113,7 @@ insert into table decimal_tbl_4_orc values(1809242.3151111344, -1809242.31511113 select * from decimal_tbl_4_orc; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p; diff --git ql/src/test/queries/clientpositive/vector_decimal_trailing.q ql/src/test/queries/clientpositive/vector_decimal_trailing.q index 5f13396..40935aa 100644 --- ql/src/test/queries/clientpositive/vector_decimal_trailing.q +++ ql/src/test/queries/clientpositive/vector_decimal_trailing.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_TRAILING_txt; DROP TABLE IF EXISTS DECIMAL_TRAILING; diff --git ql/src/test/queries/clientpositive/vector_decimal_udf.q ql/src/test/queries/clientpositive/vector_decimal_udf.q index 416d348..3632b59 100644 --- ql/src/test/queries/clientpositive/vector_decimal_udf.q +++ ql/src/test/queries/clientpositive/vector_decimal_udf.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_UDF_txt; DROP TABLE IF EXISTS DECIMAL_UDF; @@ -19,80 +19,80 @@ STORED AS ORC; INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt; -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key + key FROM DECIMAL_UDF; SELECT key + key FROM DECIMAL_UDF; -EXPLAIN SELECT key + value FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key + value FROM DECIMAL_UDF; SELECT key + value FROM DECIMAL_UDF; -EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key + (value/2) FROM DECIMAL_UDF; SELECT key + (value/2) FROM DECIMAL_UDF; -EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key + '1.0' FROM DECIMAL_UDF; SELECT key + '1.0' FROM DECIMAL_UDF; -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key - key FROM DECIMAL_UDF; SELECT key - key FROM DECIMAL_UDF; -EXPLAIN SELECT key - value FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key - value FROM DECIMAL_UDF; SELECT key - value FROM DECIMAL_UDF; -EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key - (value/2) FROM DECIMAL_UDF; SELECT key - (value/2) FROM DECIMAL_UDF; -EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key - '1.0' FROM DECIMAL_UDF; SELECT key - '1.0' FROM DECIMAL_UDF; -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key * key FROM DECIMAL_UDF; SELECT key * key FROM DECIMAL_UDF; -EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0; +EXPLAIN VECTORIZATION EXPRESSION SELECT key, value FROM DECIMAL_UDF where key * value > 0; SELECT key, value FROM DECIMAL_UDF where key * value > 0; -EXPLAIN SELECT key * value FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key * value FROM DECIMAL_UDF; SELECT key * value FROM DECIMAL_UDF; -EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key * (value/2) FROM DECIMAL_UDF; SELECT key * (value/2) FROM DECIMAL_UDF; -EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT key * '2.0' FROM DECIMAL_UDF; SELECT key * '2.0' FROM DECIMAL_UDF; -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1; +EXPLAIN VECTORIZATION EXPRESSION SELECT key / 0 FROM DECIMAL_UDF limit 1; SELECT key / 0 FROM DECIMAL_UDF limit 1; -EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1; +EXPLAIN VECTORIZATION EXPRESSION SELECT key / NULL FROM DECIMAL_UDF limit 1; SELECT key / NULL FROM DECIMAL_UDF limit 1; -EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0; +EXPLAIN VECTORIZATION EXPRESSION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0; SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0; -EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0; +EXPLAIN VECTORIZATION EXPRESSION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0; SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0; -EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0; +EXPLAIN VECTORIZATION EXPRESSION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0; SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0; -EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF; SELECT 1 + (key / '2.0') FROM DECIMAL_UDF; -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT abs(key) FROM DECIMAL_UDF; SELECT abs(key) FROM DECIMAL_UDF; -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value; +EXPLAIN VECTORIZATION EXPRESSION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value; SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value; -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT -key FROM DECIMAL_UDF; SELECT -key FROM DECIMAL_UDF; -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT +key FROM DECIMAL_UDF; SELECT +key FROM DECIMAL_UDF; -- ceiling @@ -100,43 +100,43 @@ EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF; SELECT CEIL(key) FROM DECIMAL_UDF; -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT FLOOR(key) FROM DECIMAL_UDF; SELECT FLOOR(key) FROM DECIMAL_UDF; -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT ROUND(key, 2) FROM DECIMAL_UDF; SELECT ROUND(key, 2) FROM DECIMAL_UDF; -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF; SELECT POWER(key, 2) FROM DECIMAL_UDF; -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF; SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF; -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value; +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value; SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value; -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value; +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value; SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value; -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF; SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF; -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(key) FROM DECIMAL_UDF; SELECT MIN(key) FROM DECIMAL_UDF; -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(key) FROM DECIMAL_UDF; SELECT MAX(key) FROM DECIMAL_UDF; -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(key) FROM DECIMAL_UDF; SELECT COUNT(key) FROM DECIMAL_UDF; DROP TABLE IF EXISTS DECIMAL_UDF_txt; diff --git ql/src/test/queries/clientpositive/vector_decimal_udf2.q ql/src/test/queries/clientpositive/vector_decimal_udf2.q index 433f464..a013f1f 100644 --- ql/src/test/queries/clientpositive/vector_decimal_udf2.q +++ ql/src/test/queries/clientpositive/vector_decimal_udf2.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; DROP TABLE IF EXISTS DECIMAL_UDF2_txt; DROP TABLE IF EXISTS DECIMAL_UDF2; @@ -18,14 +18,14 @@ STORED AS ORC; INSERT OVERWRITE TABLE DECIMAL_UDF2 SELECT * FROM DECIMAL_UDF2_txt; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10; SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), diff --git ql/src/test/queries/clientpositive/vector_distinct_2.q ql/src/test/queries/clientpositive/vector_distinct_2.q index 509b262..4be23c1 100644 --- ql/src/test/queries/clientpositive/vector_distinct_2.q +++ ql/src/test/queries/clientpositive/vector_distinct_2.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -41,7 +42,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select distinct s, t from vectortab2korc; select distinct s, t from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_elt.q ql/src/test/queries/clientpositive/vector_elt.q index f44a3be..5e54cbb 100644 --- ql/src/test/queries/clientpositive/vector_elt.q +++ ql/src/test/queries/clientpositive/vector_elt.q @@ -1,8 +1,9 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10; @@ -10,7 +11,7 @@ SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cin FROM alltypesorc WHERE ctinyint > 0 LIMIT 10; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), diff --git ql/src/test/queries/clientpositive/vector_empty_where.q ql/src/test/queries/clientpositive/vector_empty_where.q index 0543a65..3e94c92 100644 --- ql/src/test/queries/clientpositive/vector_empty_where.q +++ ql/src/test/queries/clientpositive/vector_empty_where.q @@ -2,22 +2,22 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; -- HIVE- -explain +explain vectorization expression select count (distinct cint) from alltypesorc where cstring1; select count (distinct cint) from alltypesorc where cstring1; -explain +explain vectorization expression select count (distinct cint) from alltypesorc where cint; select count (distinct cint) from alltypesorc where cint; -explain +explain vectorization expression select count (distinct cint) from alltypesorc where cfloat; select count (distinct cint) from alltypesorc where cfloat; -explain +explain vectorization expression select count (distinct cint) from alltypesorc where ctimestamp1; select count (distinct cint) from alltypesorc where ctimestamp1; diff --git ql/src/test/queries/clientpositive/vector_groupby4.q ql/src/test/queries/clientpositive/vector_groupby4.q index a59d1a8..1906c07 100644 --- ql/src/test/queries/clientpositive/vector_groupby4.q +++ ql/src/test/queries/clientpositive/vector_groupby4.q @@ -12,7 +12,7 @@ CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src; CREATE TABLE dest1(c1 STRING) STORED AS ORC; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1); diff --git ql/src/test/queries/clientpositive/vector_groupby6.q ql/src/test/queries/clientpositive/vector_groupby6.q index 89c7a19..cb01882 100644 --- ql/src/test/queries/clientpositive/vector_groupby6.q +++ ql/src/test/queries/clientpositive/vector_groupby6.q @@ -12,7 +12,7 @@ CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src; CREATE TABLE dest1(c1 STRING) STORED AS ORC; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1); diff --git ql/src/test/queries/clientpositive/vector_groupby_3.q ql/src/test/queries/clientpositive/vector_groupby_3.q index d42d7f1..299ee92 100644 --- ql/src/test/queries/clientpositive/vector_groupby_3.q +++ ql/src/test/queries/clientpositive/vector_groupby_3.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -41,7 +42,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t; select s, t, max(b) from vectortab2korc group by s, t; diff --git ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q index 53df2aa..c692182 100644 --- ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q @@ -1,5 +1,5 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=true; +set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; @@ -8,7 +8,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; set hive.exec.dynamic.partition.mode=nonstrict; -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization expression select * from src where not key in diff --git ql/src/test/queries/clientpositive/vector_groupby_reduce.q ql/src/test/queries/clientpositive/vector_groupby_reduce.q index e78b57f..bafb32e 100644 --- ql/src/test/queries/clientpositive/vector_groupby_reduce.q +++ ql/src/test/queries/clientpositive/vector_groupby_reduce.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table store_sales_txt ( @@ -91,7 +92,7 @@ ss_sold_date_sk , ss_net_profit from store_sales_txt; -explain +explain vectorization expression select ss_ticket_number from @@ -108,7 +109,7 @@ limit 20; -explain +explain vectorization expression select min(ss_ticket_number) m from @@ -133,7 +134,7 @@ order by m; -explain +explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -157,7 +158,7 @@ group by ss_ticket_number order by ss_ticket_number; -explain +explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from diff --git ql/src/test/queries/clientpositive/vector_grouping_sets.q ql/src/test/queries/clientpositive/vector_grouping_sets.q index 09ba6b6..1b5d3a0 100644 --- ql/src/test/queries/clientpositive/vector_grouping_sets.q +++ ql/src/test/queries/clientpositive/vector_grouping_sets.q @@ -45,7 +45,7 @@ create table store stored as orc as select * from store_txt; -explain +explain vectorization expression select s_store_id from store group by s_store_id with rollup; @@ -54,7 +54,7 @@ select s_store_id from store group by s_store_id with rollup; -explain +explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup; diff --git ql/src/test/queries/clientpositive/vector_if_expr.q ql/src/test/queries/clientpositive/vector_if_expr.q index 475cecf..6e7ccf7 100644 --- ql/src/test/queries/clientpositive/vector_if_expr.q +++ ql/src/test/queries/clientpositive/vector_if_expr.q @@ -1,9 +1,9 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1; SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 LIMIT 5; diff --git ql/src/test/queries/clientpositive/vector_include_no_sel.q ql/src/test/queries/clientpositive/vector_include_no_sel.q index 03f676b..a499ae5 100644 --- ql/src/test/queries/clientpositive/vector_include_no_sel.q +++ ql/src/test/queries/clientpositive/vector_include_no_sel.q @@ -6,6 +6,7 @@ SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; SET hive.mapred.mode=nonstrict; +set hive.fetch.task.conversion=none; -- HIVE-13872 -- Looking for TableScan immediately followed by ReduceSink (no intervening SEL operator). @@ -69,7 +70,7 @@ LOAD DATA LOCAL INPATH '../../data/files/customer_demographics.txt' OVERWRITE IN create table customer_demographics stored as orc as select * from customer_demographics_txt; -explain +explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')); diff --git ql/src/test/queries/clientpositive/vector_inner_join.q ql/src/test/queries/clientpositive/vector_inner_join.q index 24b66bf..54194a8 100644 --- ql/src/test/queries/clientpositive/vector_inner_join.q +++ ql/src/test/queries/clientpositive/vector_inner_join.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; CREATE TABLE orc_table_1a(a INT) STORED AS ORC; CREATE TABLE orc_table_2a(c INT) STORED AS ORC; @@ -9,12 +10,12 @@ CREATE TABLE orc_table_2a(c INT) STORED AS ORC; insert into table orc_table_1a values(1),(1), (2),(3); insert into table orc_table_2a values(0),(2), (3),(null),(4); -explain +explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2; select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2; select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2; @@ -26,38 +27,38 @@ CREATE TABLE orc_table_2b(c INT, v2 STRING) STORED AS ORC; insert into table orc_table_1b values("one", 1),("one", 1), ("two", 2),("three", 3); insert into table orc_table_2b values(0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, ""),(4, "FOUR"); -explain +explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; diff --git ql/src/test/queries/clientpositive/vector_interval_1.q ql/src/test/queries/clientpositive/vector_interval_1.q index 8fefe41..f4f0024 100644 --- ql/src/test/queries/clientpositive/vector_interval_1.q +++ ql/src/test/queries/clientpositive/vector_interval_1.q @@ -1,8 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; - set hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table if exists vector_interval_1; create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 string) stored as orc; @@ -13,7 +12,7 @@ insert into vector_interval_1 select null, null, null, null from src limit 1; -- constants/cast from string -explain +explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), @@ -28,7 +27,7 @@ from vector_interval_1 order by str1; -- interval arithmetic -explain +explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -49,7 +48,7 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt; -explain +explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -72,7 +71,7 @@ from vector_interval_1 order by dt; -- date-interval arithmetic -explain +explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -107,7 +106,7 @@ from vector_interval_1 order by dt; -- timestamp-interval arithmetic -explain +explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -142,7 +141,7 @@ from vector_interval_1 order by ts; -- timestamp-timestamp arithmetic -explain +explain vectorization expression select ts, ts - ts, @@ -159,7 +158,7 @@ from vector_interval_1 order by ts; -- date-date arithmetic -explain +explain vectorization expression select dt, dt - dt, @@ -176,7 +175,7 @@ from vector_interval_1 order by dt; -- date-timestamp arithmetic -explain +explain vectorization expression select dt, ts - dt, diff --git ql/src/test/queries/clientpositive/vector_interval_2.q ql/src/test/queries/clientpositive/vector_interval_2.q index 5afb511..0b78a4b 100644 --- ql/src/test/queries/clientpositive/vector_interval_2.q +++ ql/src/test/queries/clientpositive/vector_interval_2.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; drop table if exists vector_interval_2; create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 string, str3 string, str4 string) stored as orc; @@ -14,7 +14,7 @@ insert into vector_interval_2 -- interval comparisons in select clause -explain +explain vectorization expression select str1, -- Should all be true @@ -77,7 +77,7 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1; -explain +explain vectorization expression select str1, -- Should all be false @@ -128,7 +128,7 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1; -explain +explain vectorization expression select str3, -- Should all be true @@ -191,7 +191,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3; -explain +explain vectorization expression select str3, -- Should all be false @@ -244,7 +244,7 @@ from vector_interval_2 order by str3; -- interval expressions in predicates -explain +explain vectorization expression select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -293,7 +293,7 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts; -explain +explain vectorization expression select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -342,7 +342,7 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts; -explain +explain vectorization expression select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -381,7 +381,7 @@ where and dt != dt + interval '1-2' year to month order by ts; -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -431,7 +431,7 @@ where order by ts; -- day to second expressions in predicate -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -480,7 +480,7 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts; -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day diff --git ql/src/test/queries/clientpositive/vector_interval_arithmetic.q ql/src/test/queries/clientpositive/vector_interval_arithmetic.q index 40c4c03..9a551e8 100644 --- ql/src/test/queries/clientpositive/vector_interval_arithmetic.q +++ ql/src/test/queries/clientpositive/vector_interval_arithmetic.q @@ -13,7 +13,7 @@ insert overwrite table interval_arithmetic_1 SET hive.vectorized.execution.enabled=true; -- interval year-month arithmetic -explain +explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -36,7 +36,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -53,7 +53,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -76,7 +76,7 @@ select from interval_arithmetic_1 order by tsval; -explain +explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -93,7 +93,7 @@ limit 2; -- interval day-time arithmetic -explain +explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -116,7 +116,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization expression select dateval, tsval, @@ -135,7 +135,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -158,7 +158,7 @@ select from interval_arithmetic_1 order by tsval; -explain +explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second diff --git ql/src/test/queries/clientpositive/vector_interval_mapjoin.q ql/src/test/queries/clientpositive/vector_interval_mapjoin.q index 36ccd35..d27e67b 100644 --- ql/src/test/queries/clientpositive/vector_interval_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_interval_mapjoin.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; create table vectortab_a_1k( t tinyint, @@ -45,7 +46,7 @@ LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE ve CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k; -explain +explain vectorization expression select v1.s, v2.s, diff --git ql/src/test/queries/clientpositive/vector_join.q ql/src/test/queries/clientpositive/vector_join.q index 9238a6e..b086a13 100644 --- ql/src/test/queries/clientpositive/vector_join.q +++ ql/src/test/queries/clientpositive/vector_join.q @@ -32,6 +32,7 @@ TBLPROPERTIES ( set hive.auto.convert.join=false; set hive.vectorized.execution.enabled = true; set hive.mapred.mode=nonstrict; +set hive.fetch.task.conversion=none; SELECT cr.id1 , cr.id2 FROM diff --git ql/src/test/queries/clientpositive/vector_join30.q ql/src/test/queries/clientpositive/vector_join30.q index 1467cd3..6557a71 100644 --- ql/src/test/queries/clientpositive/vector_join30.q +++ ql/src/test/queries/clientpositive/vector_join30.q @@ -10,7 +10,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src; -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -25,7 +25,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -40,7 +40,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -55,7 +55,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -76,7 +76,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -97,7 +97,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -118,7 +118,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -139,7 +139,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN diff --git ql/src/test/queries/clientpositive/vector_join_part_col_char.q ql/src/test/queries/clientpositive/vector_join_part_col_char.q index 5cfce37..e625a64 100644 --- ql/src/test/queries/clientpositive/vector_join_part_col_char.q +++ ql/src/test/queries/clientpositive/vector_join_part_col_char.q @@ -4,6 +4,7 @@ set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.vectorized.execution.enabled=true; set hive.explain.user=true; set hive.metastore.fastpath=false; +set hive.fetch.task.conversion=none; drop table if exists char_part_tbl1 ; drop table if exists char_part_tbl2; @@ -22,7 +23,7 @@ insert into table char_tbl2 partition(gpa='3') select name, age from studenttab show partitions char_tbl1; show partitions char_tbl2; -explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); +explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); set hive.vectorized.execution.enabled=false; diff --git ql/src/test/queries/clientpositive/vector_left_outer_join.q ql/src/test/queries/clientpositive/vector_left_outer_join.q index ddf2660..0684f12 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join.q @@ -3,7 +3,9 @@ set hive.explain.user=false; set hive.vectorized.execution.enabled=true; set hive.auto.convert.join=true; set hive.mapjoin.hybridgrace.hashtable=false; -explain +set hive.fetch.task.conversion=none; + +explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q index 5da5d50..ccceb36 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -20,14 +20,14 @@ INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -36,7 +36,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -44,7 +44,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -52,7 +52,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -60,7 +60,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain +explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q index dfb8405..8469a06 100644 --- ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q @@ -26,381 +26,502 @@ select * from t4; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only summary + +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only operator +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only operator +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only operator +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only operator +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only operator +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only operator +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only operator +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only operator +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; diff --git ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q index c9e9e48..d960559 100644 --- ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q +++ ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -9,7 +10,7 @@ SET hive.auto.convert.join=true; -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -23,7 +24,7 @@ where li.l_linenumber = 1 and ; -- non agg, corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and diff --git ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q index 1f17669..22830a5 100644 --- ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q +++ ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table date_dim ( @@ -104,7 +105,7 @@ stored as orc; -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, diff --git ql/src/test/queries/clientpositive/vector_multi_insert.q ql/src/test/queries/clientpositive/vector_multi_insert.q index 374a0da..c56ee1c 100644 --- ql/src/test/queries/clientpositive/vector_multi_insert.q +++ ql/src/test/queries/clientpositive/vector_multi_insert.q @@ -1,6 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; create table orc1 stored as orc @@ -22,7 +22,7 @@ create table orc_rn3 (rn int); analyze table orc1 compute statistics; -explain from orc1 a +explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000; diff --git ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q index 69142bf..113ea7f 100644 --- ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q +++ ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q @@ -1,4 +1,4 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; -explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint); \ No newline at end of file +explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_non_string_partition.q ql/src/test/queries/clientpositive/vector_non_string_partition.q index bffc93e..cf3e765 100644 --- ql/src/test/queries/clientpositive/vector_non_string_partition.q +++ ql/src/test/queries/clientpositive/vector_non_string_partition.q @@ -4,16 +4,17 @@ SET hive.vectorized.execution.enabled=true; CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC; SET hive.exec.dynamic.partition.mode=nonstrict; SET hive.exec.dynamic.partition=true; +set hive.fetch.task.conversion=none; INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble; SHOW PARTITIONS non_string_part; -EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; -EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_null_projection.q ql/src/test/queries/clientpositive/vector_null_projection.q index 66c0838..711b8e7 100644 --- ql/src/test/queries/clientpositive/vector_null_projection.q +++ ql/src/test/queries/clientpositive/vector_null_projection.q @@ -10,12 +10,12 @@ insert into table a values('aaa'); insert into table b values('aaa'); -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization expression select NULL from a; select NULL from a; -explain +explain vectorization expression select NULL as x from a union distinct select NULL as x from b; select NULL as x from a union distinct select NULL as x from b; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_nullsafe_join.q ql/src/test/queries/clientpositive/vector_nullsafe_join.q index b316a54..6a7ff72 100644 --- ql/src/test/queries/clientpositive/vector_nullsafe_join.q +++ ql/src/test/queries/clientpositive/vector_nullsafe_join.q @@ -1,6 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -- @@ -20,19 +21,19 @@ CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; SET hive.vectorized.execution.mapjoin.native.enabled=false; -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value; select * from myinput1 a join myinput1 b on a.key<=>b.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; -- outer joins @@ -47,19 +48,19 @@ SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value; SET hive.vectorized.execution.mapjoin.native.enabled=true; -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value; select * from myinput1 a join myinput1 b on a.key<=>b.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; -- outer joins diff --git ql/src/test/queries/clientpositive/vector_number_compare_projection.q ql/src/test/queries/clientpositive/vector_number_compare_projection.q index feb5e98..3f4f5aa 100644 --- ql/src/test/queries/clientpositive/vector_number_compare_projection.q +++ ql/src/test/queries/clientpositive/vector_number_compare_projection.q @@ -35,7 +35,7 @@ SET hive.vectorized.execution.enabled=true; -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q; @@ -44,7 +44,7 @@ SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q; diff --git ql/src/test/queries/clientpositive/vector_nvl.q ql/src/test/queries/clientpositive/vector_nvl.q index 742bf52..e00e82f 100644 --- ql/src/test/queries/clientpositive/vector_nvl.q +++ ql/src/test/queries/clientpositive/vector_nvl.q @@ -1,7 +1,8 @@ SET hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10; @@ -11,7 +12,7 @@ FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10; -EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10; @@ -19,7 +20,7 @@ SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10; -EXPLAIN SELECT nvl(null, 10) as n +EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10; @@ -27,7 +28,7 @@ SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10; -EXPLAIN SELECT nvl(null, null) as n +EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_orderby_5.q ql/src/test/queries/clientpositive/vector_orderby_5.q index 30bcaef..17ccf82 100644 --- ql/src/test/queries/clientpositive/vector_orderby_5.q +++ ql/src/test/queries/clientpositive/vector_orderby_5.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; create table vectortab2k( t tinyint, @@ -39,7 +40,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc; select bo, max(b) from vectortab2korc group by bo order by bo desc; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_outer_join0.q ql/src/test/queries/clientpositive/vector_outer_join0.q index dce3a1b..d7586c7 100644 --- ql/src/test/queries/clientpositive/vector_outer_join0.q +++ ql/src/test/queries/clientpositive/vector_outer_join0.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; CREATE TABLE orc_table_1(v1 STRING, a INT) STORED AS ORC; CREATE TABLE orc_table_2(c INT, v2 STRING) STORED AS ORC; @@ -12,14 +13,14 @@ insert into table orc_table_2 values (0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, select * from orc_table_1; select * from orc_table_2; -explain +explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c; -- SORT_QUERY_RESULTS select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c; -explain +explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_outer_join1.q ql/src/test/queries/clientpositive/vector_outer_join1.q index 4a36452..6cb2e45 100644 --- ql/src/test/queries/clientpositive/vector_outer_join1.q +++ ql/src/test/queries/clientpositive/vector_outer_join1.q @@ -2,6 +2,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; -- Using cint and ctinyint in test queries create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5; @@ -28,7 +29,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -41,7 +42,7 @@ from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint; -explain +explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd @@ -54,7 +55,7 @@ from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint; -explain +explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join2.q ql/src/test/queries/clientpositive/vector_outer_join2.q index d3b5805..da17806 100644 --- ql/src/test/queries/clientpositive/vector_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_outer_join2.q @@ -2,6 +2,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; -- Using cint and cbigint in test queries create table small_alltypesorc1a as select * from alltypesorc where cint is not null and cbigint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5; @@ -28,7 +29,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join3.q ql/src/test/queries/clientpositive/vector_outer_join3.q index e5fc0a9..3f28251 100644 --- ql/src/test/queries/clientpositive/vector_outer_join3.q +++ ql/src/test/queries/clientpositive/vector_outer_join3.q @@ -2,6 +2,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; -- Using cint and cstring1 in test queries create table small_alltypesorc1a as select * from alltypesorc where cint is not null and cstring1 is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5; @@ -27,7 +28,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS; ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -47,7 +48,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1; -explain +explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -67,7 +68,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1; -explain +explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join4.q ql/src/test/queries/clientpositive/vector_outer_join4.q index 45461b5..d024687 100644 --- ql/src/test/queries/clientpositive/vector_outer_join4.q +++ ql/src/test/queries/clientpositive/vector_outer_join4.q @@ -2,6 +2,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; -- Using cint and ctinyint in test queries create table small_alltypesorc1b as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 10; @@ -28,7 +29,7 @@ ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_b; -explain +explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -41,7 +42,7 @@ from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint; -explain +explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -54,7 +55,7 @@ from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint; -explain +explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/queries/clientpositive/vector_outer_join5.q ql/src/test/queries/clientpositive/vector_outer_join5.q index 18b9ab4..b8e788a 100644 --- ql/src/test/queries/clientpositive/vector_outer_join5.q +++ ql/src/test/queries/clientpositive/vector_outer_join5.q @@ -5,6 +5,7 @@ SET hive.vectorized.execution.mapjoin.native.enabled=true; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -22,7 +23,7 @@ as orc as select ctinyint, cbigint from alltypesorc limit 100; ANALYZE TABLE small_table COMPUTE STATISTICS; ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS; -explain +explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -35,7 +36,7 @@ left outer join small_table st on s.ctinyint = st.ctinyint ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -48,7 +49,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -61,7 +62,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -74,7 +75,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -105,7 +106,7 @@ as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc lim ANALYZE TABLE small_table2 COMPUTE STATISTICS; ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS; -explain +explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -118,7 +119,7 @@ left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -131,7 +132,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -144,7 +145,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -157,7 +158,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1; -explain +explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/queries/clientpositive/vector_outer_join6.q ql/src/test/queries/clientpositive/vector_outer_join6.q index 06fa385..b39e8ed 100644 --- ql/src/test/queries/clientpositive/vector_outer_join6.q +++ ql/src/test/queries/clientpositive/vector_outer_join6.q @@ -3,6 +3,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -28,14 +29,14 @@ create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt; create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt; create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt; -explain +explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; -explain +explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; diff --git ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q index f25374d..b825fb3 100644 --- ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q +++ ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; create table inventory_txt ( @@ -27,7 +27,7 @@ partitioned by (par string) stored as orc; insert into table inventory_part_0 partition(par='1') select * from inventory_txt; insert into table inventory_part_0 partition(par='2') select * from inventory_txt; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_0; select sum(inv_quantity_on_hand) from inventory_part_0; @@ -47,7 +47,7 @@ alter table inventory_part_1 add columns (fifthcol string); insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_1; select sum(inv_quantity_on_hand) from inventory_part_1; @@ -66,7 +66,7 @@ insert into table inventory_part_2a partition(par='1') select * from inventory_t insert into table inventory_part_2a partition(par='2') select * from inventory_txt; alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2a; create table inventory_part_2b( @@ -80,7 +80,7 @@ insert into table inventory_part_2b partition(par1='1',par2=4) select * from inv insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt; alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2b; -- Verify we do not vectorize when a partition column type is different. @@ -97,5 +97,5 @@ insert into table inventory_part_3 partition(par='1') select * from inventory_tx insert into table inventory_part_3 partition(par='2') select * from inventory_txt; alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint; -explain +explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_partitioned_date_time.q ql/src/test/queries/clientpositive/vector_partitioned_date_time.q index f53d8c0..ee22c01 100644 --- ql/src/test/queries/clientpositive/vector_partitioned_date_time.q +++ ql/src/test/queries/clientpositive/vector_partitioned_date_time.q @@ -1,6 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. -- EXCLUDE_OS_WINDOWS @@ -32,12 +32,12 @@ select fl_date, count(*) from flights_tiny_orc group by fl_date; SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25; select * from flights_tiny_orc sort by fl_num, fl_date limit 25; -explain +explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date; select fl_date, count(*) from flights_tiny_orc group by fl_date; @@ -71,17 +71,17 @@ select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_date; select * from flights_tiny_orc_partitioned_date; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; -explain +explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; @@ -115,17 +115,17 @@ select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp; select * from flights_tiny_orc_partitioned_timestamp; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; -explain +explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; diff --git ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q index c157df1..bd4931c 100644 --- ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q +++ ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q @@ -30,12 +30,12 @@ select fl_date, count(*) from flights_tiny_orc group by fl_date; SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25; select * from flights_tiny_orc sort by fl_num, fl_date limit 25; -explain +explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date; select fl_date, count(*) from flights_tiny_orc group by fl_date; @@ -69,17 +69,17 @@ select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_date; select * from flights_tiny_orc_partitioned_date; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; -explain +explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; @@ -113,17 +113,17 @@ select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl SET hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp; select * from flights_tiny_orc_partitioned_timestamp; -explain +explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; -explain +explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; diff --git ql/src/test/queries/clientpositive/vector_reduce1.q ql/src/test/queries/clientpositive/vector_reduce1.q index cfd803f..ce90491 100644 --- ql/src/test/queries/clientpositive/vector_reduce1.q +++ ql/src/test/queries/clientpositive/vector_reduce1.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reducesink.new.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select b from vectortab2korc order by b; select b from vectortab2korc order by b; diff --git ql/src/test/queries/clientpositive/vector_reduce2.q ql/src/test/queries/clientpositive/vector_reduce2.q index ab67132..80ad196 100644 --- ql/src/test/queries/clientpositive/vector_reduce2.q +++ ql/src/test/queries/clientpositive/vector_reduce2.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reducesink.new.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2; select s, i, s2 from vectortab2korc order by s, i, s2; diff --git ql/src/test/queries/clientpositive/vector_reduce3.q ql/src/test/queries/clientpositive/vector_reduce3.q index bf8206f..e01ed26 100644 --- ql/src/test/queries/clientpositive/vector_reduce3.q +++ ql/src/test/queries/clientpositive/vector_reduce3.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reducesink.new.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization expression select s from vectortab2korc order by s; select s from vectortab2korc order by s; diff --git ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q index 4a50150..bbd25ae 100644 --- ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q +++ ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q @@ -1,10 +1,12 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; + CREATE TABLE decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc WHERE cint is not null and cdouble is not null; SET hive.vectorized.execution.enabled=true; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 diff --git ql/src/test/queries/clientpositive/vector_string_concat.q ql/src/test/queries/clientpositive/vector_string_concat.q index f3a5965..b03c2a4 100644 --- ql/src/test/queries/clientpositive/vector_string_concat.q +++ ql/src/test/queries/clientpositive/vector_string_concat.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; DROP TABLE over1k; DROP TABLE over1korc; @@ -37,7 +38,7 @@ STORED AS ORC; INSERT INTO TABLE over1korc SELECT * FROM over1k; -EXPLAIN SELECT s AS `string`, +EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20; @@ -86,7 +87,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) diff --git ql/src/test/queries/clientpositive/vector_string_decimal.q ql/src/test/queries/clientpositive/vector_string_decimal.q index e69cd77..186e339 100644 --- ql/src/test/queries/clientpositive/vector_string_decimal.q +++ ql/src/test/queries/clientpositive/vector_string_decimal.q @@ -13,7 +13,7 @@ insert overwrite table orc_decimal select id from staging; set hive.vectorized.execution.enabled=true; -explain +explain vectorization expression select * from orc_decimal where id in ('100000000', '200000000'); select * from orc_decimal where id in ('100000000', '200000000'); diff --git ql/src/test/queries/clientpositive/vector_struct_in.q ql/src/test/queries/clientpositive/vector_struct_in.q index 50487db..207be37 100644 --- ql/src/test/queries/clientpositive/vector_struct_in.q +++ ql/src/test/queries/clientpositive/vector_struct_in.q @@ -1,8 +1,9 @@ set hive.cbo.enable=false; -set hive.explain.user=true; +set hive.explain.user=false; set hive.tez.dynamic.partition.pruning=false; set hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -11,7 +12,7 @@ create table test_1 (`id` string, `lineid` string) stored as orc; insert into table test_1 values ('one','1'), ('seven','1'); -explain +explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -38,7 +39,7 @@ struct('nine','1'), struct('ten','1') ); -explain +explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -71,7 +72,7 @@ create table test_2 (`id` int, `lineid` int) stored as orc; insert into table test_2 values (1,1), (7,1); -explain +explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -98,7 +99,7 @@ struct(9,1), struct(10,1) ); -explain +explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -130,7 +131,7 @@ create table test_3 (`id` string, `lineid` int) stored as orc; insert into table test_3 values ('one',1), ('seven',1); -explain +explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -157,7 +158,7 @@ struct('nine',1), struct('ten',1) ); -explain +explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -189,7 +190,7 @@ create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5); -explain +explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -218,7 +219,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ); -explain +explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), diff --git ql/src/test/queries/clientpositive/vector_tablesample_rows.q ql/src/test/queries/clientpositive/vector_tablesample_rows.q index 4deb1c8..94b2f5b 100644 --- ql/src/test/queries/clientpositive/vector_tablesample_rows.q +++ ql/src/test/queries/clientpositive/vector_tablesample_rows.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; -explain +explain vectorization expression select 'key1', 'value1' from alltypesorc tablesample (1 rows); select 'key1', 'value1' from alltypesorc tablesample (1 rows); @@ -12,7 +12,7 @@ select 'key1', 'value1' from alltypesorc tablesample (1 rows); create table decimal_2 (t decimal(18,9)) stored as orc; -explain +explain vectorization expression insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows); @@ -25,12 +25,12 @@ drop table decimal_2; -- Dummy tables HIVE-13190 -explain +explain vectorization expression select count(1) from (select * from (Select 1 a) x order by x.a) y; select count(1) from (select * from (Select 1 a) x order by x.a) y; -explain +explain vectorization expression create temporary table dual as select 1; create temporary table dual as select 1; diff --git ql/src/test/queries/clientpositive/vector_udf1.q ql/src/test/queries/clientpositive/vector_udf1.q index 2fcc69b..e60b6fc 100644 --- ql/src/test/queries/clientpositive/vector_udf1.q +++ ql/src/test/queries/clientpositive/vector_udf1.q @@ -8,7 +8,7 @@ insert overwrite table varchar_udf_1 select key, value, key, value from src where key = '238' limit 1; -- UDFs with varchar support -explain +explain vectorization expression select concat(c1, c2), concat(c3, c4), @@ -21,7 +21,7 @@ select concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1; -explain +explain vectorization expression select upper(c2), upper(c4), @@ -34,7 +34,7 @@ select upper(c2) = upper(c4) from varchar_udf_1 limit 1; -explain +explain vectorization expression select lower(c2), lower(c4), @@ -48,7 +48,7 @@ select from varchar_udf_1 limit 1; -- Scalar UDFs -explain +explain vectorization expression select ascii(c2), ascii(c4), @@ -61,7 +61,7 @@ select ascii(c2) = ascii(c4) from varchar_udf_1 limit 1; -explain +explain vectorization expression select concat_ws('|', c1, c2), concat_ws('|', c3, c4), @@ -74,7 +74,7 @@ select concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1; -explain +explain vectorization expression select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), @@ -87,7 +87,7 @@ select decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1; -explain +explain vectorization expression select instr(c2, '_'), instr(c4, '_'), @@ -100,7 +100,7 @@ select instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1; -explain +explain vectorization expression select length(c2), length(c4), @@ -113,7 +113,7 @@ select length(c2) = length(c4) from varchar_udf_1 limit 1; -explain +explain vectorization expression select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), @@ -126,7 +126,7 @@ select locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1; -explain +explain vectorization expression select lpad(c2, 15, ' '), lpad(c4, 15, ' '), @@ -139,7 +139,7 @@ select lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1; -explain +explain vectorization expression select ltrim(c2), ltrim(c4), @@ -152,7 +152,7 @@ select ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1; -explain +explain vectorization expression select c2 regexp 'val', c4 regexp 'val', @@ -165,7 +165,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -178,7 +178,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -191,7 +191,7 @@ select regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1; -explain +explain vectorization expression select reverse(c2), reverse(c4), @@ -204,7 +204,7 @@ select reverse(c2) = reverse(c4) from varchar_udf_1 limit 1; -explain +explain vectorization expression select rpad(c2, 15, ' '), rpad(c4, 15, ' '), @@ -217,7 +217,7 @@ select rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1; -explain +explain vectorization expression select rtrim(c2), rtrim(c4), @@ -230,7 +230,7 @@ select rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1; -explain +explain vectorization expression select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) @@ -241,7 +241,7 @@ select sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1; -explain +explain vectorization expression select split(c2, '_'), split(c4, '_') @@ -252,7 +252,7 @@ select split(c4, '_') from varchar_udf_1 limit 1; -explain +explain vectorization expression select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') @@ -263,7 +263,7 @@ select str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1; -explain +explain vectorization expression select substr(c2, 1, 3), substr(c4, 1, 3), @@ -276,7 +276,7 @@ select substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1; -explain +explain vectorization expression select trim(c2), trim(c4), @@ -291,7 +291,7 @@ from varchar_udf_1 limit 1; -- Aggregate Functions -explain +explain vectorization expression select compute_stats(c2, 16), compute_stats(c4, 16) @@ -302,7 +302,7 @@ select compute_stats(c4, 16) from varchar_udf_1; -explain +explain vectorization expression select min(c2), min(c4) @@ -313,7 +313,7 @@ select min(c4) from varchar_udf_1; -explain +explain vectorization expression select max(c2), max(c4) diff --git ql/src/test/queries/clientpositive/vector_udf2.q ql/src/test/queries/clientpositive/vector_udf2.q index b926c4f..e62af6a 100644 --- ql/src/test/queries/clientpositive/vector_udf2.q +++ ql/src/test/queries/clientpositive/vector_udf2.q @@ -7,7 +7,7 @@ create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20) insert overwrite table varchar_udf_2 select key, value, key, value from src where key = '238' limit 1; -explain +explain vectorization expression select c1 LIKE '%38%', c2 LIKE 'val_%', diff --git ql/src/test/queries/clientpositive/vector_udf3.q ql/src/test/queries/clientpositive/vector_udf3.q index 8a4df79..bc3a5e1 100644 --- ql/src/test/queries/clientpositive/vector_udf3.q +++ ql/src/test/queries/clientpositive/vector_udf3.q @@ -1,10 +1,11 @@ ADD JAR ivy://org.apache.hive.hive-it-custom-udfs:udf-vectorized-badexample:+; +set hive.fetch.task.conversion=none; CREATE TEMPORARY FUNCTION rot13 as 'hive.it.custom.udfs.GenericUDFRot13'; set hive.vectorized.execution.enabled=true; -EXPLAIN SELECT rot13(cstring1) from alltypesorc; +EXPLAIN VECTORIZATION EXPRESSION SELECT rot13(cstring1) from alltypesorc; SELECT cstring1, rot13(cstring1) from alltypesorc order by cstring1 desc limit 10; diff --git ql/src/test/queries/clientpositive/vector_varchar_4.q ql/src/test/queries/clientpositive/vector_varchar_4.q index 32a74a4..80f84d8 100644 --- ql/src/test/queries/clientpositive/vector_varchar_4.q +++ ql/src/test/queries/clientpositive/vector_varchar_4.q @@ -1,5 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; drop table if exists vectortab2k; drop table if exists vectortab2korc; @@ -44,7 +45,7 @@ INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; drop table if exists varchar_lazy_binary_columnar; create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; -explain +explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; -- insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q index ac0570e..285d2ac 100644 --- ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q +++ ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q @@ -37,15 +37,15 @@ create table varchar_join1_vc2_orc stored as orc as select * from varchar_join1_ create table varchar_join1_str_orc stored as orc as select * from varchar_join1_str; -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; drop table varchar_join1_vc1; diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q index acd6598..6f753a7 100644 --- ql/src/test/queries/clientpositive/vector_varchar_simple.q +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -1,5 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + drop table varchar_2; create table varchar_2 ( @@ -14,7 +16,7 @@ from src order by key asc limit 5; -explain select key, value +explain vectorization select key, value from varchar_2 order by key asc limit 5; @@ -30,7 +32,7 @@ from src order by key desc limit 5; -explain select key, value +explain vectorization select key, value from varchar_2 order by key desc limit 5; @@ -48,7 +50,7 @@ create table varchar_3 ( field varchar(25) ) stored as orc; -explain +explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10; insert into table varchar_3 select cint from alltypesorc limit 10; diff --git ql/src/test/queries/clientpositive/vector_when_case_null.q ql/src/test/queries/clientpositive/vector_when_case_null.q index a423b60..4acd6dc 100644 --- ql/src/test/queries/clientpositive/vector_when_case_null.q +++ ql/src/test/queries/clientpositive/vector_when_case_null.q @@ -8,7 +8,7 @@ set hive.fetch.task.conversion=none; create table count_case_groupby (key string, bool boolean) STORED AS orc; insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL); -explain +explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorization_0.q ql/src/test/queries/clientpositive/vectorization_0.q index caa6a6a..00fb22a 100644 --- ql/src/test/queries/clientpositive/vectorization_0.q +++ ql/src/test/queries/clientpositive/vectorization_0.q @@ -1,11 +1,12 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,7 +21,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1; @@ -29,7 +30,7 @@ SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -54,7 +55,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -69,7 +70,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1; @@ -78,7 +79,7 @@ SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -103,7 +104,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -118,7 +119,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1; @@ -127,7 +128,7 @@ SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -152,7 +153,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), diff --git ql/src/test/queries/clientpositive/vectorization_1.q ql/src/test/queries/clientpositive/vectorization_1.q index 8fdcb27..f71218f 100644 --- ql/src/test/queries/clientpositive/vectorization_1.q +++ ql/src/test/queries/clientpositive/vectorization_1.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_10.q ql/src/test/queries/clientpositive/vectorization_10.q index 778250a..c5f4d43 100644 --- ql/src/test/queries/clientpositive/vectorization_10.q +++ ql/src/test/queries/clientpositive/vectorization_10.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_11.q ql/src/test/queries/clientpositive/vectorization_11.q index 4ac42ac..3830ea9 100644 --- ql/src/test/queries/clientpositive/vectorization_11.q +++ ql/src/test/queries/clientpositive/vectorization_11.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_12.q ql/src/test/queries/clientpositive/vectorization_12.q index bc31f3c..0728ba9 100644 --- ql/src/test/queries/clientpositive/vectorization_12.q +++ ql/src/test/queries/clientpositive/vectorization_12.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_13.q ql/src/test/queries/clientpositive/vectorization_13.q index 005808b..84ae994 100644 --- ql/src/test/queries/clientpositive/vectorization_13.q +++ ql/src/test/queries/clientpositive/vectorization_13.q @@ -1,11 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -71,7 +71,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_14.q ql/src/test/queries/clientpositive/vectorization_14.q index 4796c18..825fd63 100644 --- ql/src/test/queries/clientpositive/vectorization_14.q +++ ql/src/test/queries/clientpositive/vectorization_14.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, diff --git ql/src/test/queries/clientpositive/vectorization_15.q ql/src/test/queries/clientpositive/vectorization_15.q index 21ba8c8..5c48c58 100644 --- ql/src/test/queries/clientpositive/vectorization_15.q +++ ql/src/test/queries/clientpositive/vectorization_15.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, diff --git ql/src/test/queries/clientpositive/vectorization_16.q ql/src/test/queries/clientpositive/vectorization_16.q index 11b709f..822c824 100644 --- ql/src/test/queries/clientpositive/vectorization_16.q +++ ql/src/test/queries/clientpositive/vectorization_16.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_17.q ql/src/test/queries/clientpositive/vectorization_17.q index 1306f6b..57cdc41 100644 --- ql/src/test/queries/clientpositive/vectorization_17.q +++ ql/src/test/queries/clientpositive/vectorization_17.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, diff --git ql/src/test/queries/clientpositive/vectorization_2.q ql/src/test/queries/clientpositive/vectorization_2.q index f232815..4941d1e 100644 --- ql/src/test/queries/clientpositive/vectorization_2.q +++ ql/src/test/queries/clientpositive/vectorization_2.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_3.q ql/src/test/queries/clientpositive/vectorization_3.q index d6e6580..2e0350a 100644 --- ql/src/test/queries/clientpositive/vectorization_3.q +++ ql/src/test/queries/clientpositive/vectorization_3.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_4.q ql/src/test/queries/clientpositive/vectorization_4.q index 3151cf0..ba603c8 100644 --- ql/src/test/queries/clientpositive/vectorization_4.q +++ ql/src/test/queries/clientpositive/vectorization_4.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_5.q ql/src/test/queries/clientpositive/vectorization_5.q index 773f4b3..e2d4d0a 100644 --- ql/src/test/queries/clientpositive/vectorization_5.q +++ ql/src/test/queries/clientpositive/vectorization_5.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_6.q ql/src/test/queries/clientpositive/vectorization_6.q index 803f592..f55a2fb 100644 --- ql/src/test/queries/clientpositive/vectorization_6.q +++ ql/src/test/queries/clientpositive/vectorization_6.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_7.q ql/src/test/queries/clientpositive/vectorization_7.q index 131f570..bf3a1c2 100644 --- ql/src/test/queries/clientpositive/vectorization_7.q +++ ql/src/test/queries/clientpositive/vectorization_7.q @@ -1,11 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -60,7 +60,7 @@ LIMIT 25; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, diff --git ql/src/test/queries/clientpositive/vectorization_8.q ql/src/test/queries/clientpositive/vectorization_8.q index 2d357f1..d43db26 100644 --- ql/src/test/queries/clientpositive/vectorization_8.q +++ ql/src/test/queries/clientpositive/vectorization_8.q @@ -1,11 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -56,7 +56,7 @@ LIMIT 20; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, diff --git ql/src/test/queries/clientpositive/vectorization_9.q ql/src/test/queries/clientpositive/vectorization_9.q index 11b709f..822c824 100644 --- ql/src/test/queries/clientpositive/vectorization_9.q +++ ql/src/test/queries/clientpositive/vectorization_9.q @@ -1,10 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_decimal_date.q ql/src/test/queries/clientpositive/vectorization_decimal_date.q index 854ee20..29c025c 100644 --- ql/src/test/queries/clientpositive/vectorization_decimal_date.q +++ ql/src/test/queries/clientpositive/vectorization_decimal_date.q @@ -1,5 +1,7 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; + CREATE TABLE date_decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (CAST (cint AS TIMESTAMP) AS DATE) AS cdate, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal FROM alltypesorc; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; +EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vectorization_div0.q ql/src/test/queries/clientpositive/vectorization_div0.q index 05d81d0..025d457 100644 --- ql/src/test/queries/clientpositive/vectorization_div0.q +++ ql/src/test/queries/clientpositive/vectorization_div0.q @@ -1,16 +1,17 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100; select cdouble / 0.0 from alltypesorc limit 100; -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100; select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) @@ -18,7 +19,7 @@ from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100; select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) diff --git ql/src/test/queries/clientpositive/vectorization_limit.q ql/src/test/queries/clientpositive/vectorization_limit.q index 707f1ed..a4c54f2 100644 --- ql/src/test/queries/clientpositive/vectorization_limit.q +++ ql/src/test/queries/clientpositive/vectorization_limit.q @@ -1,7 +1,9 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; +set hive.fetch.task.conversion=none; + +explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; set hive.optimize.reducededuplication.min.reducer=1; @@ -9,31 +11,31 @@ set hive.limit.pushdown.memory.usage=0.3f; -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; -- deduped RS -explain +explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; -- distincts -explain +explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20; select distinct(ctinyint) from alltypesorc limit 20; -explain +explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; -- limit zero -explain +explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; -- 2MR (applied to last RS) -explain +explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; diff --git ql/src/test/queries/clientpositive/vectorization_nested_udf.q ql/src/test/queries/clientpositive/vectorization_nested_udf.q index bb50f9b..da8f99c 100644 --- ql/src/test/queries/clientpositive/vectorization_nested_udf.q +++ ql/src/test/queries/clientpositive/vectorization_nested_udf.q @@ -1,3 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + SELECT SUM(abs(ctinyint)) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorization_not.q ql/src/test/queries/clientpositive/vectorization_not.q index 7ac507b..aa691ab 100644 --- ql/src/test/queries/clientpositive/vectorization_not.q +++ ql/src/test/queries/clientpositive/vectorization_not.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), diff --git ql/src/test/queries/clientpositive/vectorization_offset_limit.q ql/src/test/queries/clientpositive/vectorization_offset_limit.q index 3d01154..97e1a05 100644 --- ql/src/test/queries/clientpositive/vectorization_offset_limit.q +++ ql/src/test/queries/clientpositive/vectorization_offset_limit.q @@ -1,10 +1,11 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.mapred.mode=nonstrict; +set hive.fetch.task.conversion=none; -explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; +explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; -explain +explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3; select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorization_part.q ql/src/test/queries/clientpositive/vectorization_part.q index 8d677db..9f4fc6e 100644 --- ql/src/test/queries/clientpositive/vectorization_part.q +++ ql/src/test/queries/clientpositive/vectorization_part.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC; insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc limit 100; insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100; diff --git ql/src/test/queries/clientpositive/vectorization_part_project.q ql/src/test/queries/clientpositive/vectorization_part_project.q index 3a48f20..d0dcb6f 100644 --- ql/src/test/queries/clientpositive/vectorization_part_project.q +++ ql/src/test/queries/clientpositive/vectorization_part_project.q @@ -1,9 +1,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC; insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc order by ctinyint, cint, cbigint limit 100; insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc order by ctinyint, cint, cbigint limit 100; -explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; +explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; diff --git ql/src/test/queries/clientpositive/vectorization_part_varchar.q ql/src/test/queries/clientpositive/vectorization_part_varchar.q index d371de8..28646b9 100644 --- ql/src/test/queries/clientpositive/vectorization_part_varchar.q +++ ql/src/test/queries/clientpositive/vectorization_part_varchar.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + CREATE TABLE alltypesorc_part_varchar(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds varchar(4)) STORED AS ORC; insert overwrite table alltypesorc_part_varchar partition (ds='2011') select * from alltypesorc limit 100; insert overwrite table alltypesorc_part_varchar partition (ds='2012') select * from alltypesorc limit 100; diff --git ql/src/test/queries/clientpositive/vectorization_pushdown.q ql/src/test/queries/clientpositive/vectorization_pushdown.q index b33cfa7..8acb193 100644 --- ql/src/test/queries/clientpositive/vectorization_pushdown.q +++ ql/src/test/queries/clientpositive/vectorization_pushdown.q @@ -2,5 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.optimize.index.filter=true; -explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; +set hive.fetch.task.conversion=none; + +explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; diff --git ql/src/test/queries/clientpositive/vectorization_short_regress.q ql/src/test/queries/clientpositive/vectorization_short_regress.q index 114a3e2..03e4bbc 100644 --- ql/src/test/queries/clientpositive/vectorization_short_regress.q +++ ql/src/test/queries/clientpositive/vectorization_short_regress.q @@ -2,7 +2,7 @@ set hive.compute.query.using.stats=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=minimal; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -36,7 +36,8 @@ set hive.fetch.task.conversion=minimal; -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -112,7 +113,8 @@ WHERE ((762 = cbigint) -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -182,7 +184,8 @@ WHERE (((cbigint <= 197) -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -250,7 +253,8 @@ WHERE ((ctimestamp1 = ctimestamp2) -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -298,7 +302,8 @@ WHERE (((ctimestamp2 <= ctimestamp1) -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -376,7 +381,8 @@ LIMIT 50; -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -451,7 +457,8 @@ LIMIT 25; -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -524,7 +531,8 @@ LIMIT 75; -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -583,7 +591,8 @@ LIMIT 45; -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -628,7 +637,8 @@ LIMIT 20; -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -686,7 +696,8 @@ ORDER BY cdouble; -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -801,7 +812,8 @@ LIMIT 50; -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -883,12 +895,12 @@ ORDER BY cboolean1; -- These tests verify COUNT on empty or null colulmns work correctly. create table test_count(i int) stored as orc; -explain +explain vectorization expression select count(*) from test_count; select count(*) from test_count; -explain +explain vectorization expression select count(i) from test_count; select count(i) from test_count; @@ -911,32 +923,32 @@ insert into table alltypesnull select null, null, null, null, null, null, null, create table alltypesnullorc stored as orc as select * from alltypesnull; -explain +explain vectorization expression select count(*) from alltypesnullorc; select count(*) from alltypesnullorc; -explain +explain vectorization expression select count(ctinyint) from alltypesnullorc; select count(ctinyint) from alltypesnullorc; -explain +explain vectorization expression select count(cint) from alltypesnullorc; select count(cint) from alltypesnullorc; -explain +explain vectorization expression select count(cfloat) from alltypesnullorc; select count(cfloat) from alltypesnullorc; -explain +explain vectorization expression select count(cstring1) from alltypesnullorc; select count(cstring1) from alltypesnullorc; -explain +explain vectorization expression select count(cboolean1) from alltypesnullorc; select count(cboolean1) from alltypesnullorc; diff --git ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q index 022ce2e..191d8c6 100644 --- ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q +++ ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q @@ -1,4 +1,6 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; + create table vsmb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS @@ -28,11 +30,11 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; -explain +explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key; select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key; @@ -41,6 +43,6 @@ select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key; select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key; diff --git ql/src/test/queries/clientpositive/vectorized_case.q ql/src/test/queries/clientpositive/vectorized_case.q index e74bf82..2efacb4 100644 --- ql/src/test/queries/clientpositive/vectorized_case.q +++ ql/src/test/queries/clientpositive/vectorized_case.q @@ -2,7 +2,7 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; set hive.vectorized.execution.enabled = true ; -explain +explain vectorization expression select csmallint, case @@ -37,7 +37,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain +explain vectorization expression select csmallint, case diff --git ql/src/test/queries/clientpositive/vectorized_casts.q ql/src/test/queries/clientpositive/vectorized_casts.q index 0880e29..a32c150 100644 --- ql/src/test/queries/clientpositive/vectorized_casts.q +++ ql/src/test/queries/clientpositive/vectorized_casts.q @@ -8,7 +8,7 @@ SET hive.vectorized.execution.enabled = true; -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) diff --git ql/src/test/queries/clientpositive/vectorized_context.q ql/src/test/queries/clientpositive/vectorized_context.q index 657270e..0558bc0 100644 --- ql/src/test/queries/clientpositive/vectorized_context.q +++ ql/src/test/queries/clientpositive/vectorized_context.q @@ -1,5 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; + create table store(s_store_sk int, s_city string) stored as orc; insert overwrite table store @@ -26,7 +28,7 @@ set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; -explain +explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk diff --git ql/src/test/queries/clientpositive/vectorized_date_funcs.q ql/src/test/queries/clientpositive/vectorized_date_funcs.q index 7d7b1cf..b9a7ecf 100644 --- ql/src/test/queries/clientpositive/vectorized_date_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_date_funcs.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -24,7 +25,7 @@ INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, SELECT * FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -54,7 +55,7 @@ SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -84,7 +85,7 @@ SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -113,7 +114,7 @@ SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -134,7 +135,7 @@ FROM date_udf_flight_orc LIMIT 10; -- Test extracting the date part of expression that includes time SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), diff --git ql/src/test/queries/clientpositive/vectorized_distinct_gby.q ql/src/test/queries/clientpositive/vectorized_distinct_gby.q index 6900dc0..4339a5f 100644 --- ql/src/test/queries/clientpositive/vectorized_distinct_gby.q +++ ql/src/test/queries/clientpositive/vectorized_distinct_gby.q @@ -1,14 +1,15 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; SET hive.map.groupby.sorted=true; create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc; insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c; -explain select sum(distinct a), count(distinct a) from dtest; +explain vectorization select sum(distinct a), count(distinct a) from dtest; select sum(distinct a), count(distinct a) from dtest; -explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; +explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q index 2d3788d..d2ded71 100644 --- ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q +++ ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q @@ -7,33 +7,34 @@ set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; select distinct ds from srcpart; select distinct hr from srcpart; -EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; +EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_date stored as orc as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr; create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr; create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; @@ -41,77 +42,77 @@ set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = 'I DONT EXIST'; -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where cast(hr as string) = 11; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; @@ -119,60 +120,60 @@ set hive.auto.convert.join.noconditionaltask = true; set hive.auto.convert.join.noconditionaltask.size = 10000000; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart where hr = 11; set hive.stats.fetch.column.stats=false; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; set hive.stats.fetch.column.stats=true; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); @@ -185,7 +186,7 @@ set hive.vectorized.execution.enabled=false; set hive.exec.max.dynamic.partitions=1000; insert into table srcpart_orc partition (ds, hr) select key, value, ds, hr from srcpart; -EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11; diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin.q ql/src/test/queries/clientpositive/vectorized_mapjoin.q index 6500d41..138c133 100644 --- ql/src/test/queries/clientpositive/vectorized_mapjoin.q +++ ql/src/test/queries/clientpositive/vectorized_mapjoin.q @@ -4,10 +4,11 @@ SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint; diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin2.q ql/src/test/queries/clientpositive/vectorized_mapjoin2.q index 137acbc..d259547 100644 --- ql/src/test/queries/clientpositive/vectorized_mapjoin2.q +++ ql/src/test/queries/clientpositive/vectorized_mapjoin2.q @@ -15,7 +15,7 @@ create temporary table y (b int) stored as orc; insert into x values(1); insert into y values(1); -explain +explain vectorization expression select count(1) from x, y where a = b; select count(1) from x, y where a = b; diff --git ql/src/test/queries/clientpositive/vectorized_math_funcs.q ql/src/test/queries/clientpositive/vectorized_math_funcs.q index d79fcce..6875909 100644 --- ql/src/test/queries/clientpositive/vectorized_math_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_math_funcs.q @@ -1,9 +1,10 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) diff --git ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q index 4332898..5b07c9f 100644 --- ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q +++ ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q @@ -4,9 +4,10 @@ SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; diff --git ql/src/test/queries/clientpositive/vectorized_parquet.q ql/src/test/queries/clientpositive/vectorized_parquet.q index da138e0..e6ebdaa 100644 --- ql/src/test/queries/clientpositive/vectorized_parquet.q +++ ql/src/test/queries/clientpositive/vectorized_parquet.q @@ -21,7 +21,7 @@ insert overwrite table alltypes_parquet SET hive.vectorized.execution.enabled=true; -explain select * +explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10; @@ -30,7 +30,7 @@ select * where cint = 528534767 limit 10; -explain select ctinyint, +explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), diff --git ql/src/test/queries/clientpositive/vectorized_parquet_types.q ql/src/test/queries/clientpositive/vectorized_parquet_types.q index 297c5af..68761b6 100644 --- ql/src/test/queries/clientpositive/vectorized_parquet_types.q +++ ql/src/test/queries/clientpositive/vectorized_parquet_types.q @@ -48,19 +48,19 @@ SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, unhex(cbinary), cdecimal FROM parquet_types_staging; -- select -explain +explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; -explain +explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; -explain +explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), diff --git ql/src/test/queries/clientpositive/vectorized_ptf.q ql/src/test/queries/clientpositive/vectorized_ptf.q index 64082e9..e648320 100644 --- ql/src/test/queries/clientpositive/vectorized_ptf.q +++ ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ insert into table part_orc select * from part_staging; --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -63,7 +64,7 @@ from noop(on part_orc -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -80,7 +81,7 @@ sort by j.p_name) -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -93,7 +94,7 @@ order by p_name); -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +115,7 @@ from noop(on part_orc -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -137,7 +138,7 @@ from noop(on part_orc -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,7 +163,7 @@ group by p_mfgr, p_name, p_size -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -177,7 +178,7 @@ order by p_name -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -192,7 +193,7 @@ order by p_name -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -207,7 +208,7 @@ order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -226,7 +227,7 @@ from noopwithmap(on part_orc -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -247,7 +248,7 @@ order by p_name) -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -268,7 +269,7 @@ order by p_mfgr, p_name -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -295,7 +296,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -322,7 +323,7 @@ order by p_name -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -341,7 +342,7 @@ sum(p_retailprice) as s from part_orc group by p_mfgr, p_brand; -explain extended +explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -375,7 +376,7 @@ dr INT, cud DOUBLE, fv1 INT); -explain extended +explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -412,7 +413,7 @@ select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -447,7 +448,7 @@ from noop(on -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -482,7 +483,7 @@ from noop(on -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -514,7 +515,7 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -549,7 +550,7 @@ from noopwithmap(on -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -582,7 +583,7 @@ from noop(on -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/queries/clientpositive/vectorized_shufflejoin.q ql/src/test/queries/clientpositive/vectorized_shufflejoin.q index f57d062..9227de0 100644 --- ql/src/test/queries/clientpositive/vectorized_shufflejoin.q +++ ql/src/test/queries/clientpositive/vectorized_shufflejoin.q @@ -2,10 +2,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=false; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT; diff --git ql/src/test/queries/clientpositive/vectorized_string_funcs.q ql/src/test/queries/clientpositive/vectorized_string_funcs.q index d04a3c3..ee95c0b 100644 --- ql/src/test/queries/clientpositive/vectorized_string_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_string_funcs.q @@ -1,9 +1,10 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) diff --git ql/src/test/queries/clientpositive/vectorized_timestamp.q ql/src/test/queries/clientpositive/vectorized_timestamp.q index 2784b7a..ceee2ee 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp.q @@ -6,23 +6,23 @@ CREATE TABLE test(ts TIMESTAMP) STORED AS ORC; INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999'); SET hive.vectorized.execution.enabled = false; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SET hive.vectorized.execution.enabled = true; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index aaf85fc..afbc18a 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. -- Turning on vectorization has been temporarily moved after filling the test table -- due to bug HIVE-8197. @@ -23,7 +24,7 @@ INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -49,7 +50,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -75,7 +76,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -103,7 +104,7 @@ FROM alltypesorc_string ORDER BY c1; -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -129,7 +130,7 @@ SELECT FROM alltypesorc_wrong ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), @@ -144,7 +145,7 @@ SELECT FROM alltypesorc_string; -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; @@ -152,7 +153,7 @@ SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q index 15964c9..e6e6d5d 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q @@ -1,8 +1,9 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled = true; SET hive.int.timestamp.conversion.in.seconds=false; +set hive.fetch.task.conversion=none; -explain +explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -40,7 +41,7 @@ where cbigint % 250 = 0; SET hive.int.timestamp.conversion.in.seconds=true; -explain +explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out index d8032d8..5b74e0b 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out @@ -83,25 +83,73 @@ POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).b SIMPLE [(valu POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=101) - default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_permute_select + Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,part,a,b from part_add_int_permute_select @@ -202,25 +250,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).c EXPRES POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=145) - default@part_add_int_string_permute_select,part_add_int_string_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_string_permute_select + Statistics: Num rows: 2 Data size: 290 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,part,a,b from part_add_int_string_permute_select @@ -383,25 +479,73 @@ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c2 SIMPLE [ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c3 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:double1, type:double, comment:null), ] POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num double1 double1 double1 _c4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=5 width=426) - default@part_change_string_group_double,part_change_string_group_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_double + Statistics: Num rows: 5 Data size: 2130 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY @@ -501,25 +645,73 @@ POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION( POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=586) - default@part_change_date_group_string_group_date_timestamp,part_change_date_group_string_group_date_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_date_group_string_group_date_timestamp + Statistics: Num rows: 6 Data size: 3521 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: char(50)), c3 (type: char(15)), c4 (type: varchar(50)), c5 (type: varchar(15)), c6 (type: string), c7 (type: char(50)), c8 (type: char(15)), c9 (type: varchar(50)), c10 (type: varchar(15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:string, c2:char(50), c3:char(15), c4:varchar(50), c5:varchar(15), c6:string, c7:char(50), c8:char(15), c9:varchar(50), c10:varchar(15), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY @@ -696,25 +888,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_grou POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - TableScan [TS_0] (rows=6 width=483) - default@part_change_numeric_group_string_group_multi_ints_string_group,part_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 2903 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -873,25 +1113,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 4540 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - TableScan [TS_0] (rows=6 width=756) - default@part_change_numeric_group_string_group_floating_string_group,part_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY @@ -1038,25 +1326,73 @@ POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1 POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_string_group_string + Statistics: Num rows: 6 Data size: 6682 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(50)), c2 (type: char(9)), c3 (type: varchar(50)), c4 (type: char(9)), c5 (type: varchar(50)), c6 (type: varchar(9)), c7 (type: string), c8 (type: char(50)), c9 (type: char(9)), c10 (type: string), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:char(50), c2:char(9), c3:varchar(50), c4:char(9), c5:varchar(50), c6:varchar(9), c7:string, c8:char(50), c9:char(9), c10:string, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=1113) - default@part_change_string_group_string_group_string,part_change_string_group_string_group_string,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY @@ -1237,25 +1573,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint P POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - TableScan [TS_0] (rows=6 width=236) - default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint,part_change_lower_to_higher_numeric_group_tinyint_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_tinyint_to_bigint + Statistics: Num rows: 6 Data size: 1419 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: smallint), c2 (type: int), c3 (type: bigint), c4 (type: decimal(38,18)), c5 (type: float), c6 (type: double), c7 (type: int), c8 (type: bigint), c9 (type: decimal(38,18)), c10 (type: float), c11 (type: double), c12 (type: bigint), c13 (type: decimal(38,18)), c14 (type: float), c15 (type: double), c16 (type: decimal(38,18)), c17 (type: float), c18 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 20 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + dataColumns: insert_num:int, c1:smallint, c2:int, c3:bigint, c4:decimal(38,18), c5:float, c6:double, c7:int, c8:bigint, c9:decimal(38,18), c10:float, c11:double, c12:bigint, c13:decimal(38,18), c14:float, c15:double, c16:decimal(38,18), c17:float, c18:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY @@ -1366,25 +1750,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PA POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).c3 EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_decimal_to_float + Statistics: Num rows: 6 Data size: 1523 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: float), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:float, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=6 width=253) - default@part_change_lower_to_higher_numeric_group_decimal_to_float,part_change_lower_to_higher_numeric_group_decimal_to_float,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out index 0a01b8c..1511298 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out @@ -149,25 +149,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).insert_num SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, type:struct, comment:null), ] complex_struct1_c_txt.insert_num complex_struct1_c_txt.s1 complex_struct1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=6 width=789) - default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_struct1 + Statistics: Num rows: 6 Data size: 4734 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY @@ -413,25 +443,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).b SIMPLE [ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).insert_num SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, type:struct, comment:null), ] complex_struct2_d_txt.insert_num complex_struct2_d_txt.b complex_struct2_d_txt.s2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=8 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=8 width=614) - default@part_add_various_various_struct2,part_add_various_various_struct2,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s2"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_various_various_struct2 + Statistics: Num rows: 8 Data size: 4912 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY @@ -601,25 +661,55 @@ POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).insert_num SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).s3 SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:s3, type:struct, comment:null), ] complex_struct4_c_txt.insert_num complex_struct4_c_txt.b complex_struct4_c_txt.s3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_to_various_various_struct4 + Statistics: Num rows: 4 Data size: 1172 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported + vectorized: false -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=4 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=4 width=293) - default@part_add_to_various_various_struct4,part_add_to_various_various_struct4,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s3"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out index d240f82..c89be06 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out @@ -278,25 +278,73 @@ POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part= POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=10 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"] - TableScan [TS_0] (rows=10 width=1168) - default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_boolean_to_bigint + Statistics: Num rows: 10 Data size: 11688 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 (type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 (type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 (type: tinyint), c21 (type: smallint), c22 (type: smallint), c23 (type: smallint), c24 (type: smallint), c25 (type: smallint), c26 (type: smallint), c27 (type: smallint), c28 (type: smallint), c29 (type: smallint), c30 (type: smallint), c31 (type: smallint), c32 (type: int), c33 (type: int), c34 (type: int), c35 (type: int), c36 (type: int), c37 (type: int), c38 (type: int), c39 (type: int), c40 (type: int), c41 (type: int), c42 (type: int), c43 (type: bigint), c44 (type: bigint), c45 (type: bigint), c46 (type: bigint), c47 (type: bigint), c48 (type: bigint), c49 (type: bigint), c50 (type: bigint), c51 (type: bigint), c52 (type: bigint), c53 (type: bigint), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 55, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 55 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + dataColumns: insert_num:int, c1:boolean, c2:boolean, c3:boolean, c4:boolean, c5:boolean, c6:boolean, c7:boolean, c8:boolean, c9:boolean, c10:tinyint, c11:tinyint, c12:tinyint, c13:tinyint, c14:tinyint, c15:tinyint, c16:tinyint, c17:tinyint, c18:tinyint, c19:tinyint, c20:tinyint, c21:smallint, c22:smallint, c23:smallint, c24:smallint, c25:smallint, c26:smallint, c27:smallint, c28:smallint, c29:smallint, c30:smallint, c31:smallint, c32:int, c33:int, c34:int, c35:int, c36:int, c37:int, c38:int, c39:int, c40:int, c41:int, c42:int, c43:bigint, c44:bigint, c45:bigint, c46:bigint, c47:bigint, c48:bigint, c49:bigint, c50:bigint, c51:bigint, c52:bigint, c53:bigint, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY @@ -493,25 +541,73 @@ POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part= POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:decimal1, type:decimal(38,18), comment:null), ] POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 _c34 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35"] - TableScan [TS_0] (rows=6 width=1382) - default@part_change_various_various_decimal_to_double,part_change_various_various_decimal_to_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_decimal_to_double + Statistics: Num rows: 6 Data size: 8295 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: decimal(38,18)), c2 (type: decimal(38,18)), c3 (type: decimal(38,18)), c4 (type: decimal(38,18)), c5 (type: decimal(38,18)), c6 (type: decimal(38,18)), c7 (type: decimal(38,18)), c8 (type: decimal(38,18)), c9 (type: decimal(38,18)), c10 (type: decimal(38,18)), c11 (type: decimal(38,18)), c12 (type: float), c13 (type: float), c14 (type: float), c15 (type: float), c16 (type: float), c17 (type: float), c18 (type: float), c19 (type: float), c20 (type: float), c21 (type: float), c22 (type: float), c23 (type: double), c24 (type: double), c25 (type: double), c26 (type: double), c27 (type: double), c28 (type: double), c29 (type: double), c30 (type: double), c31 (type: double), c32 (type: double), c33 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 35, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 35 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + dataColumns: insert_num:int, c1:decimal(38,18), c2:decimal(38,18), c3:decimal(38,18), c4:decimal(38,18), c5:decimal(38,18), c6:decimal(38,18), c7:decimal(38,18), c8:decimal(38,18), c9:decimal(38,18), c10:decimal(38,18), c11:decimal(38,18), c12:float, c13:float, c14:float, c15:float, c16:float, c17:float, c18:float, c19:float, c20:float, c21:float, c22:float, c23:double, c24:double, c25:double, c26:double, c27:double, c28:double, c29:double, c30:double, c31:double, c32:double, c33:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY @@ -624,25 +720,73 @@ POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c8 SI POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:timestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 _c13 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - TableScan [TS_0] (rows=6 width=494) - default@part_change_various_various_timestamp,part_change_various_various_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_timestamp + Statistics: Num rows: 6 Data size: 2965 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: timestamp), c2 (type: timestamp), c3 (type: timestamp), c4 (type: timestamp), c5 (type: timestamp), c6 (type: timestamp), c7 (type: timestamp), c8 (type: timestamp), c9 (type: timestamp), c10 (type: timestamp), c11 (type: timestamp), c12 (type: timestamp), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + dataColumns: insert_num:int, c1:timestamp, c2:timestamp, c3:timestamp, c4:timestamp, c5:timestamp, c6:timestamp, c7:timestamp, c8:timestamp, c9:timestamp, c10:timestamp, c11:timestamp, c12:timestamp, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY @@ -739,25 +883,73 @@ POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c3 SIMPLE POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c4 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:date1, type:date, comment:null), ] POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num date1 date1 date1 date1 _c5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_date + Statistics: Num rows: 6 Data size: 2444 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: date), c2 (type: date), c3 (type: date), c4 (type: date), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 2, 3, 4, 5] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: insert_num:int, c1:date, c2:date, c3:date, c4:date, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - TableScan [TS_0] (rows=6 width=407) - default@part_change_various_various_date,part_change_various_various_date,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY @@ -935,25 +1127,73 @@ POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c5 S POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c6 SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:c6, type:decimal(25,15), comment:null), ] POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).insert_num SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] same_type1_c_txt.insert_num same_type1_c_txt.c1 same_type1_c_txt.c2 same_type1_c_txt.c3 same_type1_c_txt.c4 same_type1_c_txt.c5 same_type1_c_txt.c6 same_type1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_same_type_different_params + Statistics: Num rows: 13 Data size: 8736 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(8)), c2 (type: char(32)), c3 (type: varchar(15)), c4 (type: varchar(18)), c5 (type: decimal(10,2)), c6 (type: decimal(25,15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 1, 2, 3, 4, 5, 6, 7] + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7] + dataColumns: insert_num:int, c1:char(8), c2:char(32), c3:varchar(15), c4:varchar(18), c5:decimal(10,2), c6:decimal(25,15), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=13 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - TableScan [TS_0] (rows=13 width=672) - default@part_change_same_type_different_params,part_change_same_type_different_params,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out index 5e99743..5ef34e5 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out @@ -83,25 +83,72 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE [(values__tmp__table__1 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=99) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=5 width=99) - default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_permute_select + Statistics: Num rows: 5 Data size: 496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 5 Data size: 496 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 496 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,a,b from table_add_int_permute_select @@ -208,25 +255,72 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c EXPRESSION [(values__tm POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=99) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=5 width=99) - default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_string_permute_select + Statistics: Num rows: 5 Data size: 496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 5 Data size: 496 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 496 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,a,b from table_add_int_string_permute_select @@ -403,25 +497,72 @@ POSTHOOK: Lineage: table_change_string_group_double.c2 EXPRESSION [(values__tmp_ POSTHOOK: Lineage: table_change_string_group_double.c3 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_change_string_group_double.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=422) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=5 width=422) - default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_string_group_double + Statistics: Num rows: 5 Data size: 2110 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 2110 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 2110 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY @@ -691,25 +832,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_gro POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 5 Data size: 820 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 5 Data size: 820 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 820 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=164) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=5 width=164) - default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -864,25 +1052,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 5 Data size: 2940 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 5 Data size: 2940 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 2940 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=588) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=5 width=588) - default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out index 437770d..1d4163c 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out @@ -87,25 +87,73 @@ POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).b SIMPLE [(valu POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=16) - default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_permute_select + Statistics: Num rows: 2 Data size: 33 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,part,a,b from part_add_int_permute_select @@ -206,25 +254,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).c EXPRES POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=19) - default@part_add_int_string_permute_select,part_add_int_string_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_string_permute_select + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,part,a,b from part_add_int_string_permute_select @@ -387,25 +483,73 @@ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c2 SIMPLE [ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c3 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:double1, type:double, comment:null), ] POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num double1 double1 double1 _c4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=5 width=56) - default@part_change_string_group_double,part_change_string_group_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_double + Statistics: Num rows: 5 Data size: 284 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY @@ -505,25 +649,73 @@ POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION( POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=154) - default@part_change_date_group_string_group_date_timestamp,part_change_date_group_string_group_date_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_date_group_string_group_date_timestamp + Statistics: Num rows: 6 Data size: 926 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: char(50)), c3 (type: char(15)), c4 (type: varchar(50)), c5 (type: varchar(15)), c6 (type: string), c7 (type: char(50)), c8 (type: char(15)), c9 (type: varchar(50)), c10 (type: varchar(15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:string, c2:char(50), c3:char(15), c4:varchar(50), c5:varchar(15), c6:string, c7:char(50), c8:char(15), c9:varchar(50), c10:varchar(15), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY @@ -700,25 +892,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_grou POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - TableScan [TS_0] (rows=6 width=153) - default@part_change_numeric_group_string_group_multi_ints_string_group,part_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 918 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -877,25 +1117,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 1386 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - TableScan [TS_0] (rows=6 width=231) - default@part_change_numeric_group_string_group_floating_string_group,part_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY @@ -1042,25 +1330,73 @@ POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1 POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_string_group_string + Statistics: Num rows: 6 Data size: 421 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(50)), c2 (type: char(9)), c3 (type: varchar(50)), c4 (type: char(9)), c5 (type: varchar(50)), c6 (type: varchar(9)), c7 (type: string), c8 (type: char(50)), c9 (type: char(9)), c10 (type: string), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:char(50), c2:char(9), c3:varchar(50), c4:char(9), c5:varchar(50), c6:varchar(9), c7:string, c8:char(50), c9:char(9), c10:string, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=70) - default@part_change_string_group_string_group_string,part_change_string_group_string_group_string,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY @@ -1241,25 +1577,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint P POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - TableScan [TS_0] (rows=6 width=143) - default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint,part_change_lower_to_higher_numeric_group_tinyint_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_tinyint_to_bigint + Statistics: Num rows: 6 Data size: 860 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: smallint), c2 (type: int), c3 (type: bigint), c4 (type: decimal(38,18)), c5 (type: float), c6 (type: double), c7 (type: int), c8 (type: bigint), c9 (type: decimal(38,18)), c10 (type: float), c11 (type: double), c12 (type: bigint), c13 (type: decimal(38,18)), c14 (type: float), c15 (type: double), c16 (type: decimal(38,18)), c17 (type: float), c18 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 20 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + dataColumns: insert_num:int, c1:smallint, c2:int, c3:bigint, c4:decimal(38,18), c5:float, c6:double, c7:int, c8:bigint, c9:decimal(38,18), c10:float, c11:double, c12:bigint, c13:decimal(38,18), c14:float, c15:double, c16:decimal(38,18), c17:float, c18:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY @@ -1370,25 +1754,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PA POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).c3 EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_decimal_to_float + Statistics: Num rows: 6 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: float), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:float, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=6 width=71) - default@part_change_lower_to_higher_numeric_group_decimal_to_float,part_change_lower_to_higher_numeric_group_decimal_to_float,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out index e35222b..aa09b53 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out @@ -153,25 +153,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).insert_num SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, type:struct, comment:null), ] complex_struct1_c_txt.insert_num complex_struct1_c_txt.s1 complex_struct1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=6 width=155) - default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_struct1 + Statistics: Num rows: 6 Data size: 931 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY @@ -417,25 +447,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).b SIMPLE [ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).insert_num SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, type:struct, comment:null), ] complex_struct2_d_txt.insert_num complex_struct2_d_txt.b complex_struct2_d_txt.s2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=8 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=8 width=117) - default@part_add_various_various_struct2,part_add_various_various_struct2,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s2"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_various_various_struct2 + Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY @@ -605,25 +665,55 @@ POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).insert_num SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).s3 SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:s3, type:struct, comment:null), ] complex_struct4_c_txt.insert_num complex_struct4_c_txt.b complex_struct4_c_txt.s3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_to_various_various_struct4 + Statistics: Num rows: 4 Data size: 353 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported + vectorized: false -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=4 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=4 width=88) - default@part_add_to_various_various_struct4,part_add_to_various_various_struct4,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s3"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out index fb38687..e8b5c71 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out @@ -282,25 +282,73 @@ POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part= POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=10 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"] - TableScan [TS_0] (rows=10 width=470) - default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_boolean_to_bigint + Statistics: Num rows: 10 Data size: 4707 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 (type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 (type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 (type: tinyint), c21 (type: smallint), c22 (type: smallint), c23 (type: smallint), c24 (type: smallint), c25 (type: smallint), c26 (type: smallint), c27 (type: smallint), c28 (type: smallint), c29 (type: smallint), c30 (type: smallint), c31 (type: smallint), c32 (type: int), c33 (type: int), c34 (type: int), c35 (type: int), c36 (type: int), c37 (type: int), c38 (type: int), c39 (type: int), c40 (type: int), c41 (type: int), c42 (type: int), c43 (type: bigint), c44 (type: bigint), c45 (type: bigint), c46 (type: bigint), c47 (type: bigint), c48 (type: bigint), c49 (type: bigint), c50 (type: bigint), c51 (type: bigint), c52 (type: bigint), c53 (type: bigint), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 55, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 55 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + dataColumns: insert_num:int, c1:boolean, c2:boolean, c3:boolean, c4:boolean, c5:boolean, c6:boolean, c7:boolean, c8:boolean, c9:boolean, c10:tinyint, c11:tinyint, c12:tinyint, c13:tinyint, c14:tinyint, c15:tinyint, c16:tinyint, c17:tinyint, c18:tinyint, c19:tinyint, c20:tinyint, c21:smallint, c22:smallint, c23:smallint, c24:smallint, c25:smallint, c26:smallint, c27:smallint, c28:smallint, c29:smallint, c30:smallint, c31:smallint, c32:int, c33:int, c34:int, c35:int, c36:int, c37:int, c38:int, c39:int, c40:int, c41:int, c42:int, c43:bigint, c44:bigint, c45:bigint, c46:bigint, c47:bigint, c48:bigint, c49:bigint, c50:bigint, c51:bigint, c52:bigint, c53:bigint, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY @@ -497,25 +545,73 @@ POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part= POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:decimal1, type:decimal(38,18), comment:null), ] POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 _c34 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35"] - TableScan [TS_0] (rows=6 width=425) - default@part_change_various_various_decimal_to_double,part_change_various_various_decimal_to_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_decimal_to_double + Statistics: Num rows: 6 Data size: 2551 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: decimal(38,18)), c2 (type: decimal(38,18)), c3 (type: decimal(38,18)), c4 (type: decimal(38,18)), c5 (type: decimal(38,18)), c6 (type: decimal(38,18)), c7 (type: decimal(38,18)), c8 (type: decimal(38,18)), c9 (type: decimal(38,18)), c10 (type: decimal(38,18)), c11 (type: decimal(38,18)), c12 (type: float), c13 (type: float), c14 (type: float), c15 (type: float), c16 (type: float), c17 (type: float), c18 (type: float), c19 (type: float), c20 (type: float), c21 (type: float), c22 (type: float), c23 (type: double), c24 (type: double), c25 (type: double), c26 (type: double), c27 (type: double), c28 (type: double), c29 (type: double), c30 (type: double), c31 (type: double), c32 (type: double), c33 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 35, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 35 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + dataColumns: insert_num:int, c1:decimal(38,18), c2:decimal(38,18), c3:decimal(38,18), c4:decimal(38,18), c5:decimal(38,18), c6:decimal(38,18), c7:decimal(38,18), c8:decimal(38,18), c9:decimal(38,18), c10:decimal(38,18), c11:decimal(38,18), c12:float, c13:float, c14:float, c15:float, c16:float, c17:float, c18:float, c19:float, c20:float, c21:float, c22:float, c23:double, c24:double, c25:double, c26:double, c27:double, c28:double, c29:double, c30:double, c31:double, c32:double, c33:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY @@ -628,25 +724,73 @@ POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c8 SI POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:timestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 _c13 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - TableScan [TS_0] (rows=6 width=145) - default@part_change_various_various_timestamp,part_change_various_various_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_timestamp + Statistics: Num rows: 6 Data size: 870 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: timestamp), c2 (type: timestamp), c3 (type: timestamp), c4 (type: timestamp), c5 (type: timestamp), c6 (type: timestamp), c7 (type: timestamp), c8 (type: timestamp), c9 (type: timestamp), c10 (type: timestamp), c11 (type: timestamp), c12 (type: timestamp), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + dataColumns: insert_num:int, c1:timestamp, c2:timestamp, c3:timestamp, c4:timestamp, c5:timestamp, c6:timestamp, c7:timestamp, c8:timestamp, c9:timestamp, c10:timestamp, c11:timestamp, c12:timestamp, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY @@ -743,25 +887,73 @@ POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c3 SIMPLE POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c4 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:date1, type:date, comment:null), ] POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num date1 date1 date1 date1 _c5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_date + Statistics: Num rows: 6 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: date), c2 (type: date), c3 (type: date), c4 (type: date), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 2, 3, 4, 5] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: insert_num:int, c1:date, c2:date, c3:date, c4:date, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - TableScan [TS_0] (rows=6 width=62) - default@part_change_various_various_date,part_change_various_various_date,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY @@ -939,25 +1131,73 @@ POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c5 S POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c6 SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:c6, type:decimal(25,15), comment:null), ] POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).insert_num SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] same_type1_c_txt.insert_num same_type1_c_txt.c1 same_type1_c_txt.c2 same_type1_c_txt.c3 same_type1_c_txt.c4 same_type1_c_txt.c5 same_type1_c_txt.c6 same_type1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_same_type_different_params + Statistics: Num rows: 13 Data size: 1311 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(8)), c2 (type: char(32)), c3 (type: varchar(15)), c4 (type: varchar(18)), c5 (type: decimal(10,2)), c6 (type: decimal(25,15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 1, 2, 3, 4, 5, 6, 7] + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7] + dataColumns: insert_num:int, c1:char(8), c2:char(32), c3:varchar(15), c4:varchar(18), c5:decimal(10,2), c6:decimal(25,15), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=13 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - TableScan [TS_0] (rows=13 width=100) - default@part_change_same_type_different_params,part_change_same_type_different_params,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out index b137894..85f858b 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out @@ -87,25 +87,72 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE [(values__tmp__table__1 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=20) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=5 width=20) - default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_permute_select + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,a,b from table_add_int_permute_select @@ -212,25 +259,72 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c EXPRESSION [(values__tm POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=20) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=5 width=20) - default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_string_permute_select + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,a,b from table_add_int_string_permute_select @@ -407,25 +501,72 @@ POSTHOOK: Lineage: table_change_string_group_double.c2 EXPRESSION [(values__tmp_ POSTHOOK: Lineage: table_change_string_group_double.c3 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_change_string_group_double.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=52) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=5 width=52) - default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_string_group_double + Statistics: Num rows: 5 Data size: 264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 264 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 264 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY @@ -695,25 +836,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_gro POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 5 Data size: 755 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 5 Data size: 755 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 755 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=151) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=5 width=151) - default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -868,25 +1056,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 5 Data size: 1250 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 5 Data size: 1250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 1250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=250) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=5 width=250) - default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part.q.out index 98d6303..b198354 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part.q.out @@ -87,25 +87,73 @@ POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).b SIMPLE [(valu POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=16) - default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_permute_select + Statistics: Num rows: 2 Data size: 33 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,part,a,b from part_add_int_permute_select @@ -206,25 +254,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).c EXPRES POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,a,b from part_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=2 width=19) - default@part_add_int_string_permute_select,part_add_int_string_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_int_string_permute_select + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,part,a,b from part_add_int_string_permute_select @@ -387,25 +483,73 @@ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c2 SIMPLE [ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c3 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:double1, type:double, comment:null), ] POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num double1 double1 double1 _c4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=5 width=56) - default@part_change_string_group_double,part_change_string_group_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_double + Statistics: Num rows: 5 Data size: 284 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY @@ -505,25 +649,73 @@ POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION( POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=154) - default@part_change_date_group_string_group_date_timestamp,part_change_date_group_string_group_date_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_date_group_string_group_date_timestamp + Statistics: Num rows: 6 Data size: 926 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: char(50)), c3 (type: char(15)), c4 (type: varchar(50)), c5 (type: varchar(15)), c6 (type: string), c7 (type: char(50)), c8 (type: char(15)), c9 (type: varchar(50)), c10 (type: varchar(15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:string, c2:char(50), c3:char(15), c4:varchar(50), c5:varchar(15), c6:string, c7:char(50), c8:char(15), c9:varchar(50), c10:varchar(15), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY @@ -700,25 +892,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_grou POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - TableScan [TS_0] (rows=6 width=153) - default@part_change_numeric_group_string_group_multi_ints_string_group,part_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 6 Data size: 918 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -877,25 +1117,73 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 6 Data size: 1386 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - TableScan [TS_0] (rows=6 width=231) - default@part_change_numeric_group_string_group_floating_string_group,part_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY @@ -1042,25 +1330,73 @@ POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1 POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_string_group_string_group_string + Statistics: Num rows: 6 Data size: 421 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(50)), c2 (type: char(9)), c3 (type: varchar(50)), c4 (type: char(9)), c5 (type: varchar(50)), c6 (type: varchar(9)), c7 (type: string), c8 (type: char(50)), c9 (type: char(9)), c10 (type: string), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: insert_num:int, c1:char(50), c2:char(9), c3:varchar(50), c4:char(9), c5:varchar(50), c6:varchar(9), c7:string, c8:char(50), c9:char(9), c10:string, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=70) - default@part_change_string_group_string_group_string,part_change_string_group_string_group_string,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY @@ -1241,25 +1577,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint P POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - TableScan [TS_0] (rows=6 width=143) - default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint,part_change_lower_to_higher_numeric_group_tinyint_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_tinyint_to_bigint + Statistics: Num rows: 6 Data size: 860 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: smallint), c2 (type: int), c3 (type: bigint), c4 (type: decimal(38,18)), c5 (type: float), c6 (type: double), c7 (type: int), c8 (type: bigint), c9 (type: decimal(38,18)), c10 (type: float), c11 (type: double), c12 (type: bigint), c13 (type: decimal(38,18)), c14 (type: float), c15 (type: double), c16 (type: decimal(38,18)), c17 (type: float), c18 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 20 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + dataColumns: insert_num:int, c1:smallint, c2:int, c3:bigint, c4:decimal(38,18), c5:float, c6:double, c7:int, c8:bigint, c9:decimal(38,18), c10:float, c11:double, c12:bigint, c13:decimal(38,18), c14:float, c15:double, c16:decimal(38,18), c17:float, c18:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY @@ -1370,25 +1754,73 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PA POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).c3 EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_lower_to_higher_numeric_group_decimal_to_float + Statistics: Num rows: 6 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: float), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 3, 4] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:float, c2:double, c3:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=6 width=71) - default@part_change_lower_to_higher_numeric_group_decimal_to_float,part_change_lower_to_higher_numeric_group_decimal_to_float,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out index 294a8f0..a331468 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out @@ -155,25 +155,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).insert_num SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, type:struct, comment:null), ] complex_struct1_c_txt.insert_num complex_struct1_c_txt.s1 complex_struct1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,s1,b from part_change_various_various_struct1 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=6 width=155) - default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_struct1 + Statistics: Num rows: 6 Data size: 931 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,s1,b from part_change_various_various_struct1 PREHOOK: type: QUERY @@ -419,25 +449,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).b SIMPLE [ POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).insert_num SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, type:struct, comment:null), ] complex_struct2_d_txt.insert_num complex_struct2_d_txt.b complex_struct2_d_txt.s2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s2 from part_add_various_various_struct2 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=8 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=8 width=117) - default@part_add_various_various_struct2,part_add_various_various_struct2,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s2"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_various_various_struct2 + Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s2 from part_add_various_various_struct2 PREHOOK: type: QUERY @@ -607,25 +667,55 @@ POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).b SIMPL POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).insert_num SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] POSTHOOK: Lineage: part_add_to_various_various_struct4 PARTITION(part=1).s3 SIMPLE [(complex_struct4_c_txt)complex_struct4_c_txt.FieldSchema(name:s3, type:struct, comment:null), ] complex_struct4_c_txt.insert_num complex_struct4_c_txt.b complex_struct4_c_txt.s3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,b,s3 from part_add_to_various_various_struct4 POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_add_to_various_various_struct4 + Statistics: Num rows: 4 Data size: 353 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported + vectorized: false -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=4 width=4) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=4 width=88) - default@part_add_to_various_various_struct4,part_add_to_various_various_struct4,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","b","s3"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,b,s3 from part_add_to_various_various_struct4 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out index 85116e7..964ce95 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out @@ -282,25 +282,73 @@ POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part= POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 boolean1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 tinyint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 int1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=10 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"] - TableScan [TS_0] (rows=10 width=475) - default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_boolean_to_bigint + Statistics: Num rows: 10 Data size: 4759 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 (type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 (type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 (type: tinyint), c21 (type: smallint), c22 (type: smallint), c23 (type: smallint), c24 (type: smallint), c25 (type: smallint), c26 (type: smallint), c27 (type: smallint), c28 (type: smallint), c29 (type: smallint), c30 (type: smallint), c31 (type: smallint), c32 (type: int), c33 (type: int), c34 (type: int), c35 (type: int), c36 (type: int), c37 (type: int), c38 (type: int), c39 (type: int), c40 (type: int), c41 (type: int), c42 (type: int), c43 (type: bigint), c44 (type: bigint), c45 (type: bigint), c46 (type: bigint), c47 (type: bigint), c48 (type: bigint), c49 (type: bigint), c50 (type: bigint), c51 (type: bigint), c52 (type: bigint), c53 (type: bigint), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 55, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 55 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54] + dataColumns: insert_num:int, c1:boolean, c2:boolean, c3:boolean, c4:boolean, c5:boolean, c6:boolean, c7:boolean, c8:boolean, c9:boolean, c10:tinyint, c11:tinyint, c12:tinyint, c13:tinyint, c14:tinyint, c15:tinyint, c16:tinyint, c17:tinyint, c18:tinyint, c19:tinyint, c20:tinyint, c21:smallint, c22:smallint, c23:smallint, c24:smallint, c25:smallint, c26:smallint, c27:smallint, c28:smallint, c29:smallint, c30:smallint, c31:smallint, c32:int, c33:int, c34:int, c35:int, c36:int, c37:int, c38:int, c39:int, c40:int, c41:int, c42:int, c43:bigint, c44:bigint, c45:bigint, c46:bigint, c47:bigint, c48:bigint, c49:bigint, c50:bigint, c51:bigint, c52:bigint, c53:bigint, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY @@ -497,25 +545,73 @@ POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part= POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:decimal1, type:decimal(38,18), comment:null), ] POSTHOOK: Lineage: part_change_various_various_decimal_to_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 decimal1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 float1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 double1 _c34 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35"] - TableScan [TS_0] (rows=6 width=427) - default@part_change_various_various_decimal_to_double,part_change_various_various_decimal_to_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_decimal_to_double + Statistics: Num rows: 6 Data size: 2563 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: decimal(38,18)), c2 (type: decimal(38,18)), c3 (type: decimal(38,18)), c4 (type: decimal(38,18)), c5 (type: decimal(38,18)), c6 (type: decimal(38,18)), c7 (type: decimal(38,18)), c8 (type: decimal(38,18)), c9 (type: decimal(38,18)), c10 (type: decimal(38,18)), c11 (type: decimal(38,18)), c12 (type: float), c13 (type: float), c14 (type: float), c15 (type: float), c16 (type: float), c17 (type: float), c18 (type: float), c19 (type: float), c20 (type: float), c21 (type: float), c22 (type: float), c23 (type: double), c24 (type: double), c25 (type: double), c26 (type: double), c27 (type: double), c28 (type: double), c29 (type: double), c30 (type: double), c31 (type: double), c32 (type: double), c33 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 35, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 35 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + dataColumns: insert_num:int, c1:decimal(38,18), c2:decimal(38,18), c3:decimal(38,18), c4:decimal(38,18), c5:decimal(38,18), c6:decimal(38,18), c7:decimal(38,18), c8:decimal(38,18), c9:decimal(38,18), c10:decimal(38,18), c11:decimal(38,18), c12:float, c13:float, c14:float, c15:float, c16:float, c17:float, c18:float, c19:float, c20:float, c21:float, c22:float, c23:double, c24:double, c25:double, c26:double, c27:double, c28:double, c29:double, c30:double, c31:double, c32:double, c33:double, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,b from part_change_various_various_decimal_to_double PREHOOK: type: QUERY @@ -628,25 +724,73 @@ POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c8 SI POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).c9 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:timestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: part_change_various_various_timestamp PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 _c13 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - TableScan [TS_0] (rows=6 width=145) - default@part_change_various_various_timestamp,part_change_various_various_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_timestamp + Statistics: Num rows: 6 Data size: 874 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: timestamp), c2 (type: timestamp), c3 (type: timestamp), c4 (type: timestamp), c5 (type: timestamp), c6 (type: timestamp), c7 (type: timestamp), c8 (type: timestamp), c9 (type: timestamp), c10 (type: timestamp), c11 (type: timestamp), c12 (type: timestamp), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + dataColumns: insert_num:int, c1:timestamp, c2:timestamp, c3:timestamp, c4:timestamp, c5:timestamp, c6:timestamp, c7:timestamp, c8:timestamp, c9:timestamp, c10:timestamp, c11:timestamp, c12:timestamp, b:string + partitionColumnCount: 1 + partitionColumns: part:int + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,b from part_change_various_various_timestamp PREHOOK: type: QUERY @@ -743,25 +887,73 @@ POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c3 SIMPLE POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).c4 SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:date1, type:date, comment:null), ] POSTHOOK: Lineage: part_change_various_various_date PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data_2)schema_evolution_data_2.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num date1 date1 date1 date1 _c5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_various_various_date + Statistics: Num rows: 6 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: date), c2 (type: date), c3 (type: date), c4 (type: date), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 2, 3, 4, 5] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: insert_num:int, c1:date, c2:date, c3:date, c4:date, b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - TableScan [TS_0] (rows=6 width=62) - default@part_change_various_various_date,part_change_various_various_date,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,b from part_change_various_various_date PREHOOK: type: QUERY @@ -939,25 +1131,73 @@ POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c5 S POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).c6 SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:c6, type:decimal(25,15), comment:null), ] POSTHOOK: Lineage: part_change_same_type_different_params PARTITION(part=2).insert_num SIMPLE [(same_type1_c_txt)same_type1_c_txt.FieldSchema(name:insert_num, type:int, comment:null), ] same_type1_c_txt.insert_num same_type1_c_txt.c1 same_type1_c_txt.c2 same_type1_c_txt.c3 same_type1_c_txt.c4 same_type1_c_txt.c5 same_type1_c_txt.c6 same_type1_c_txt.b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_change_same_type_different_params + Statistics: Num rows: 13 Data size: 1311 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: insert_num (type: int), part (type: int), c1 (type: char(8)), c2 (type: char(32)), c3 (type: varchar(15)), c4 (type: varchar(18)), c5 (type: decimal(10,2)), c6 (type: decimal(25,15)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 1, 2, 3, 4, 5, 6, 7] + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7] + dataColumns: insert_num:int, c1:char(8), c2:char(32), c3:varchar(15), c4:varchar(18), c5:decimal(10,2), c6:decimal(25,15), b:string + partitionColumnCount: 1 + partitionColumns: part:int -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=13 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - TableScan [TS_0] (rows=13 width=100) - default@part_change_same_type_different_params,part_change_same_type_different_params,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,b from part_change_same_type_different_params PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out index 04f2891..208ee4f 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out @@ -87,25 +87,72 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE [(values__tmp__table__1 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=20) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=5 width=20) - default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_permute_select + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,a,b from table_add_int_permute_select @@ -212,25 +259,72 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c EXPRESSION [(values__tm POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,a,b from table_add_int_string_permute_select POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=20) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=5 width=20) - default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_add_int_string_permute_select + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, a:int, b:string, c:int, d:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,a,b from table_add_int_string_permute_select @@ -407,25 +501,72 @@ POSTHOOK: Lineage: table_change_string_group_double.c2 EXPRESSION [(values__tmp_ POSTHOOK: Lineage: table_change_string_group_double.c3 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_change_string_group_double.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,b from table_change_string_group_double POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=52) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=5 width=52) - default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_string_group_double + Statistics: Num rows: 5 Data size: 264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: insert_num (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 5 Data size: 264 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 264 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double PREHOOK: type: QUERY @@ -695,25 +836,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_gro POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_multi_ints_string_group.insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_multi_ints_string_group + Statistics: Num rows: 5 Data size: 755 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + Statistics: Num rows: 5 Data size: 755 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 755 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=151) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=5 width=151) - default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -868,25 +1056,72 @@ POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: table_change_numeric_group_string_group_floating_string_group.insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: table_change_numeric_group_string_group_floating_string_group + Statistics: Num rows: 5 Data size: 1250 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Select Operator + expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 5 Data size: 1250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 1250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string + partitionColumnCount: 0 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=5 width=250) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=5 width=250) - default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index d4a9747..653274c 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,12 +128,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: dc (type: decimal(38,18)) outputColumnNames: dc + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(dc), max(dc), sum(dc), avg(dc) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 6) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE @@ -139,8 +157,21 @@ STAGE PLANS: value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index 3d00ade..6785248 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -33,9 +33,9 @@ POSTHOOK: Output: default@testvec POSTHOOK: Lineage: testvec.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: testvec.greg_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: testvec.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 PREHOOK: type: QUERY -POSTHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 735e4f4..8e11112 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -43,48 +43,110 @@ POSTHOOK: Output: default@tbl2 POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -102,7 +164,7 @@ POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 22 PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select key, count(*) from @@ -113,7 +175,7 @@ select count(*) from ) subq2 PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select key, count(*) from @@ -123,46 +185,149 @@ select count(*) from group by key ) subq2 POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 vectorized, llap - File Output Operator [FS_31] - Group By Operator [GBY_30] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_29] - Group By Operator [GBY_28] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_27] (rows=5 width=93) - Group By Operator [GBY_26] (rows=5 width=93) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=11 width=93) - Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_24] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_23] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_22] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -191,7 +356,7 @@ POSTHOOK: Input: default@tbl2 6 PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization expression select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -210,7 +375,7 @@ on src1.key = src2.key PREHOOK: type: QUERY POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization expression select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -227,71 +392,206 @@ join ) src2 on src1.key = src2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 llap - File Output Operator [FS_32] - Select Operator [SEL_31] (rows=5 width=102) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_49] (rows=5 width=102) - Conds:RS_51._col0=RS_53._col0(Inner),Output:["_col0","_col1","_col3"] - <-Reducer 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_51] - PartitionCols:_col0 - Group By Operator [GBY_50] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_42] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_41] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Reducer 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_53] - PartitionCols:_col0 - Group By Operator [GBY_52] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] - PartitionCols:_col0 - Group By Operator [GBY_24] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_47] (rows=11 width=93) - Conds:SEL_16._col0=SEL_19._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_19] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_44] (rows=10 width=93) - predicate:key is not null - TableScan [TS_17] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_16] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_43] (rows=10 width=93) - predicate:key is not null - TableScan [TS_14] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from ( @@ -339,7 +639,7 @@ POSTHOOK: Input: default@tbl2 9 1 1 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -348,44 +648,106 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -408,7 +770,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization expression select count(*) from ( select * from @@ -422,7 +784,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization expression select count(*) from ( select * from @@ -434,37 +796,99 @@ select count(*) from join tbl2 b on subq2.key = b.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -497,7 +921,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select * from @@ -518,7 +942,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select * from @@ -537,37 +961,99 @@ select count(*) from ) subq4 on subq2.key = subq4.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -613,7 +1099,7 @@ POSTHOOK: Input: default@tbl1 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization expression select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -623,44 +1109,106 @@ PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization expression select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 8) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 8) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 @@ -683,7 +1231,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization expression select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -692,51 +1240,172 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization expression select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongColAddLongScalar(col 0, val 1) -> 2:long) -> boolean + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongColAddLongScalar(col 0, val 1) -> 2:long) -> boolean + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 vectorized, llap - File Output Operator [FS_29] - Group By Operator [GBY_28] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_21] (rows=11 width=93) - Conds:RS_24._col0=RS_27._col0(Inner) - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_24] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_22] (rows=10 width=93) - predicate:(key + 1) is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_26] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_25] (rows=10 width=93) - predicate:(key + 1) is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 @@ -759,49 +1428,111 @@ POSTHOOK: Input: default@tbl2 22 PREHOOK: query: -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY POSTHOOK: query: -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -820,7 +1551,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -832,7 +1563,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -842,43 +1573,112 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Statistics: Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_32] - Group By Operator [GBY_31] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] - Group By Operator [GBY_14] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_28] (rows=6 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner),SEL_2._col0=SEL_8._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_26] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_8] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_27] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_6] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_25] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -907,7 +1707,7 @@ POSTHOOK: Input: default@tbl2 56 PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -922,7 +1722,7 @@ on subq2.key = b.key) a PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -935,37 +1735,99 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from @@ -1016,7 +1878,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -1025,49 +1887,107 @@ insert overwrite table dest2 select key, val1, val2 PREHOOK: type: QUERY POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, val1, val2 POSTHOOK: type: QUERY -Plan not optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Execution mode: llap -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Map 1 llap - File Output Operator [FS_9] - table:{"name:":"default.dest1"} - Select Operator [SEL_8] (rows=11 width=93) - Output:["_col0","_col1"] - Select Operator [SEL_7] (rows=11 width=93) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_16] (rows=11 width=93) - Conds:FIL_14.key=FIL_15.key(Inner),Output:["_col0","_col1","_col6"] - <-Filter Operator [FIL_15] (rows=10 width=93) - predicate:key is not null - TableScan [TS_1] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Filter Operator [FIL_14] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - File Output Operator [FS_11] - table:{"name:":"default.dest2"} - Please refer to the previous Select Operator [SEL_7] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -1172,7 +2092,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -1181,59 +2101,152 @@ insert overwrite table dest2 select key, count(*) group by key PREHOOK: type: QUERY POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, count(*) group by key POSTHOOK: type: QUERY -Plan not optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 2 vectorized, llap - File Output Operator [FS_25] - table:{"name:":"default.dest2"} - Select Operator [SEL_24] (rows=5 width=93) - Output:["_col0","_col1"] - Group By Operator [GBY_23] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - File Output Operator [FS_9] - table:{"name:":"default.dest1"} - Merge Join Operator [MERGEJOIN_21] (rows=11 width=93) - Conds:FIL_19.key=FIL_20.key(Inner),Output:["_col0","_col1"] - <-Filter Operator [FIL_20] (rows=10 width=93) - predicate:key is not null - TableScan [TS_1] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Filter Operator [FIL_19] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - SHUFFLE [RS_12] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_10] (rows=11 width=93) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_21] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Stage: Stage-5 + Stats-Aggr Operator PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index 6b59497..739d0e1 100644 --- ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -69,13 +69,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -93,9 +97,16 @@ STAGE PLANS: TableScan alias: tint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -103,6 +114,11 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: Not empty key IS false outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -110,9 +126,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3, 5] + selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:Long) -> 5:String Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -120,21 +144,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan alias: tsint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), csint (type: smallint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -180,13 +232,17 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 4 3 10 1 NoOk 4 4 10 10 Ok Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -204,9 +260,16 @@ STAGE PLANS: TableScan alias: tint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -214,19 +277,35 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: Not empty key IS false outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:Long) -> boolean predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3] Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -234,21 +313,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan alias: tsint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), csint (type: smallint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 0d4425b..993a62f 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,28 +37,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnInList(col 3, values [-67, -171]) -> boolean predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -67,10 +109,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -88,32 +134,78 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 4:boolean) -> boolean predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -126,10 +218,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -147,28 +243,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> boolean predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -181,10 +315,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -202,32 +340,78 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean) -> boolean predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -240,10 +424,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -261,28 +449,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 3, left -2, right 1) -> boolean predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -295,10 +521,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -316,28 +546,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -350,10 +618,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -371,28 +643,66 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 1, left -20, right 45.9918918919) -> boolean predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -405,10 +715,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -426,32 +740,78 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -711,12 +1071,16 @@ POSTHOOK: Input: default@decimal_date_test 6172 PREHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY POSTHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -734,12 +1098,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -748,21 +1127,50 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -775,10 +1183,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -796,12 +1208,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -810,21 +1237,50 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -837,10 +1293,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -858,12 +1318,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 4:Long Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -872,21 +1347,50 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -899,10 +1403,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -920,12 +1428,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:Long Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -934,21 +1457,50 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index a510e38..ce05391 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -97,14 +97,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type: POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -155,6 +159,12 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for SelectColumnIsNotNull argument #0 type name Binary + vectorized: false Map 3 Map Operator Tree: TableScan @@ -175,16 +185,38 @@ STAGE PLANS: value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for SelectColumnIsNotNull argument #0 type name Binary + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -208,16 +240,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### -27832781952 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -235,12 +271,26 @@ STAGE PLANS: TableScan alias: hundredorc Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: bin (type: binary) outputColumnNames: bin + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 10 + native: false + projectedOutputColumns: [0] keys: bin (type: binary) mode: hash outputColumnNames: _col0, _col1 @@ -249,15 +299,41 @@ STAGE PLANS: key expressions: _col0 (type: binary) sort order: + Map-reduce partition columns: _col0 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: binary) mode: mergepartial outputColumnNames: _col0, _col1 @@ -265,9 +341,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: bigint), _col0 (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -320,16 +403,20 @@ POSTHOOK: Input: default@hundredorc 3 zync studies PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -347,12 +434,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -360,6 +458,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 @@ -367,9 +469,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10, 11] Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -377,26 +486,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_bround.q.out ql/src/test/results/clientpositive/llap/vector_bround.q.out index 05fac27..6adec76 100644 --- ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -34,19 +34,22 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY -POSTHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround POSTHOOK: type: QUERY Plan optimized by CBO. Stage-0 Fetch Operator limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1"] - TableScan [TS_0] - Output:["v0","v1"] + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=8 width=16) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=8 width=16) + default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:NONE,Output:["v0","v1"] PREHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_bucket.q.out ql/src/test/results/clientpositive/llap/vector_bucket.q.out index 814ac75..c2af524 100644 --- ql/src/test/results/clientpositive/llap/vector_bucket.q.out +++ ql/src/test/results/clientpositive/llap/vector_bucket.q.out @@ -6,12 +6,16 @@ POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INT POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@non_orc_table -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -42,15 +46,34 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1] + selectExpressions: VectorUDFAdaptor(UDFToInteger(VALUE._col0)) -> 2:Long Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index cd67e7e..14a10fc 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -97,20 +97,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -129,12 +133,27 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: i (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(50), avg(50.0), avg(50) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 2 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -148,8 +167,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) @@ -165,16 +197,33 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index b7b2ba5..59aea35 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -47,18 +47,22 @@ val_10 10 1 val_100 200 2 val_103 206 2 val_104 208 2 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,12 +81,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:Long Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -91,16 +110,43 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -108,21 +154,43 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -179,18 +247,22 @@ val_97 194 2 val_96 96 1 val_95 190 2 val_92 92 1 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -209,12 +281,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:Long Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -223,16 +310,43 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: - Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -240,21 +354,43 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_char_4.q.out ql/src/test/results/clientpositive/llap/vector_char_4.q.out index 6d55ab0..d164ebe 100644 --- ql/src/test/results/clientpositive/llap/vector_char_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -143,12 +147,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: name: default.char_lazy_binary_columnar Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out index 1af8b3d..57ae96b 100644 --- ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out @@ -125,11 +125,15 @@ POSTHOOK: Output: default@char_join1_str_orc POSTHOOK: Lineage: char_join1_str_orc.c1 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: char_join1_str_orc.c2 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c2, type:string, comment:null), ] PREHOOK: query: -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -148,12 +152,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -161,6 +176,10 @@ STAGE PLANS: keys: 0 _col1 (type: char(10)) 1 _col1 (type: char(10)) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -168,39 +187,89 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: char(10)) sort order: + Map-reduce partition columns: _col1 (type: char(10)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(10)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -231,11 +300,15 @@ POSTHOOK: Input: default@char_join1_vc1_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -254,32 +327,66 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: char(20)) sort order: + Map-reduce partition columns: _col1 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(20)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -287,6 +394,10 @@ STAGE PLANS: keys: 0 _col1 (type: char(20)) 1 _col1 (type: char(20)) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -294,19 +405,46 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(20)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -339,11 +477,15 @@ POSTHOOK: Input: default@char_join1_vc2_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -362,12 +504,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -375,6 +528,11 @@ STAGE PLANS: keys: 0 UDFToString(_col1) (type: string) 1 _col1 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: CastStringGroupToString(col 1) -> 2:String + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -382,39 +540,89 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_char_simple.q.out ql/src/test/results/clientpositive/llap/vector_char_simple.q.out index 3dea73d..73b7759 100644 --- ql/src/test/results/clientpositive/llap/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,51 +66,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: + - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select key, value @@ -148,16 +133,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -165,51 +154,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: - - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select key, value @@ -254,12 +224,16 @@ create table char_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -269,68 +243,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: CAST( _col0 AS CHAR(12) (type: char(12)) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: CastLongToChar(col 0, maxLength 12) -> 1:Char + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-2 - Dependency Collection Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 Stage: Stage-3 - Stats-Aggr Operator PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_coalesce.q.out ql/src/test/results/clientpositive/llap/vector_coalesce.q.out index c7897f7..2789664 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c @@ -8,12 +8,16 @@ LIMIT 10 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -21,53 +25,62 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 1045942 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cdouble is null (type: boolean) - Statistics: Num rows: 3114 Data size: 265164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cint (type: int), cfloat (type: float), csmallint (type: smallint), COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) - sort order: +++++ - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 2, 4, 1, 16] + selectExpressions: VectorCoalesce(columns [12, 6, 13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 14:String, CastLongToString(col 1) -> 15:String) -> 16:string + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 246572 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0, 1, 2, 3, 4] + selectExpressions: ConstantVectorExpression(val null) -> 5:double + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc @@ -95,18 +108,22 @@ NULL NULL -738306196 -51.0 NULL -738306196 NULL NULL -819152895 8.0 NULL -819152895 NULL NULL -827212561 8.0 NULL -827212561 NULL NULL -949587513 11.0 NULL -949587513 -PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -114,53 +131,62 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ctinyint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 37224 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double), cint (type: int), COALESCE(null,(cdouble + log2(cint)),0) (type: double) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: double), _col2 (type: int), _col3 (type: double) - sort order: +++ - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 15] + selectExpressions: VectorCoalesce(columns [12, 14, 13])(children: ConstantVectorExpression(val null) -> 12:double, DoubleColAddDoubleColumn(col 5, col 13)(children: FuncLog2LongToDouble(col 2) -> 13:double) -> 14:double, ConstantVectorExpression(val 0.0) -> 13:double) -> 15:double + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 27928 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 3:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc @@ -188,18 +214,22 @@ NULL NULL -850295959 0.0 NULL NULL -886426182 0.0 NULL NULL -899422227 0.0 NULL NULL -971543377 0.0 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -207,50 +237,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), 0.0 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 0:float, ConstantVectorExpression(val null) -> 1:bigint, ConstantVectorExpression(val 0.0) -> 2:double + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc @@ -278,18 +319,22 @@ NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 -PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -297,53 +342,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ctimestamp1 is not null or ctimestamp2 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: timestamp) - sort order: +++ - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 9) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8, col 9) -> 12:timestamp + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: timestamp), KEY.reducesinkkey2 (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc @@ -371,18 +424,22 @@ NULL 1969-12-31 15:59:43.684 1969-12-31 15:59:43.684 NULL 1969-12-31 15:59:43.703 1969-12-31 15:59:43.703 NULL 1969-12-31 15:59:43.704 1969-12-31 15:59:43.704 NULL 1969-12-31 15:59:43.709 1969-12-31 15:59:43.709 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -390,50 +447,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), null (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 0:float, ConstantVectorExpression(val null) -> 1:bigint, ConstantVectorExpression(val null) -> 2:float + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc @@ -461,34 +529,61 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 3) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 0, 14] + selectExpressions: ConstantVectorExpression(val null) -> 12:bigint, VectorCoalesce(columns [13, 0])(children: ConstantVectorExpression(val null) -> 13:tinyint, col 0) -> 14:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: cbigint is null (type: boolean) - Select Operator - expressions: null (type: bigint), ctinyint (type: tinyint), COALESCE(null,ctinyint) (type: tinyint) - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 10 - ListSink PREHOOK: query: SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index b390bfd..67f21d6 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -16,18 +16,22 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: str_str_orc.str2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -106,14 +110,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,18 +170,22 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -191,12 +203,27 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4] + selectExpressions: VectorUDFAdaptor(UDFToInteger(COALESCE(str1,0)))(children: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string) -> 4:Long Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -205,15 +232,41 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -221,9 +274,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 2, val 60.0)(children: CastLongToDouble(col 1) -> 2:double) -> 3:double) -> 2:double Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -252,14 +313,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -274,12 +339,23 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -287,6 +363,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 08d49bc..cf72bc5 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -62,14 +62,18 @@ POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] orc_create_staging.str orc_create_staging.mp orc_create_staging.lst orc_create_staging.strct PREHOOK: query: -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT * FROM orc_create_complex PREHOOK: type: QUERY POSTHOOK: query: -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT * FROM orc_create_complex POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -97,6 +101,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of Column[mp] not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -113,18 +123,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### orc_create_complex.str orc_create_complex.mp orc_create_complex.lst orc_create_complex.strct -line1 {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"] {"a":"one","b":"two"} -line2 {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"three","b":"four"} -line3 {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] {"a":"five","b":"six"} +line1 {"key13":"value13","key12":"value12","key11":"value11"} ["a","b","c"] {"a":"one","b":"two"} +line2 {"key21":"value21","key23":"value23","key22":"value22"} ["d","e","f"] {"a":"three","b":"four"} +line3 {"key33":"value33","key31":"value31","key32":"value32"} ["g","h","i"] {"a":"five","b":"six"} PREHOOK: query: -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM orc_create_complex PREHOOK: type: QUERY POSTHOOK: query: -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM orc_create_complex POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,29 +156,71 @@ STAGE PLANS: TableScan alias: orc_create_complex Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -188,14 +244,18 @@ POSTHOOK: Input: default@orc_create_complex c0 3 PREHOOK: query: -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str FROM orc_create_complex ORDER BY str PREHOOK: type: QUERY POSTHOOK: query: -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str FROM orc_create_complex ORDER BY str POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -213,25 +273,59 @@ STAGE PLANS: TableScan alias: orc_create_complex Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: str (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 97d5642..99fd25f 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -21,13 +21,17 @@ POSTHOOK: Output: default@test POSTHOOK: Lineage: test.a SIMPLE [] POSTHOOK: Lineage: test.b EXPRESSION [] c0 c1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,6 +75,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map of Column[_col1] not supported + vectorized: false Map 2 Map Operator Tree: TableScan @@ -91,6 +101,12 @@ STAGE PLANS: value expressions: _col1 (type: map) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of Column[b] not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -146,13 +162,17 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test2b POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -196,6 +216,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type array of Column[a] not supported + vectorized: false Map 2 Map Operator Tree: TableScan @@ -212,6 +238,12 @@ STAGE PLANS: value expressions: a (type: array) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: UDF GenericUDFIndex(Column[a], Const int 1) not supported + vectorized: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 3b9d9f9..c425d8f 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -47,10 +47,14 @@ POSTHOOK: Input: default@abcd 12 100 75 7 12 NULL 80 2 NULL 35 23 6 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,12 +72,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + projectedOutputColumns: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -82,12 +100,30 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) @@ -121,10 +157,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,12 +182,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -155,12 +209,30 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) @@ -190,10 +262,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -211,20 +287,45 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: a (type: int), b (type: int), c (type: int) sort order: +++ Map-reduce partition columns: a (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) @@ -258,10 +359,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -279,18 +384,43 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index 3d67664..9e185c6 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] PREHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization expression select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY POSTHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization expression select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1257,11 +1261,24 @@ STAGE PLANS: TableScan alias: web_sales Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [16] Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 16 + native: false + projectedOutputColumns: [] keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 @@ -1270,36 +1287,88 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 1760000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index a7a74c3..fc82926 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -193,10 +197,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -214,30 +222,67 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 6faf453..1a31067 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -23,7 +23,7 @@ POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -33,13 +33,17 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,12 +61,26 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -71,28 +89,65 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFCountMerge(col 5) -> bigint, VectorUDAFMaxDecimal(col 6) -> decimal(23,14), VectorUDAFMinDecimal(col 7) -> decimal(23,14), VectorUDAFSumDecimal(col 8) -> decimal(38,18), VectorUDAFCountMerge(col 9) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 9, val 1) -> boolean predicate: (_col9 > 1) (type: boolean) Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -132,7 +187,7 @@ POSTHOOK: Input: default@decimal_vgby 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 PREHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby @@ -140,13 +195,17 @@ EXPLAIN SELECT cint, HAVING COUNT(*) > 1 PREHOOK: type: QUERY POSTHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -164,12 +223,27 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -182,8 +256,21 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col4] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out index 16d9929..5cce027 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out @@ -1,25 +1,76 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 638316 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 5) -> boolean, SelectColumnIsNotNull(col 2) -> boolean, SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 8) -> boolean) -> boolean + predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) + Statistics: Num rows: 5112 Data size: 265564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 10, 8, 12, 13, 14, 15] + selectExpressions: CastDoubleToDecimal(col 5) -> 12:decimal(20,10), CastLongToDecimal(col 2) -> 13:decimal(23,14), CastLongToDecimal(col 10) -> 14:decimal(5,2), CastTimestampToDecimal(col 8) -> 15:decimal(15,0) + Statistics: Num rows: 5112 Data size: 2555740 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 5008 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 5008 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) - Select Operator - expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out index d37b973..16232c2 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out @@ -15,14 +15,18 @@ POSTHOOK: Output: default@decimal_test POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,32 +44,74 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 12288 Data size: 2128368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1, val 0) -> boolean, FilterDecimalColLessDecimalScalar(col 1, val 12345.5678) -> boolean, FilterDecimalColNotEqualDecimalScalar(col 2, val 0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 2, val 1000) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((cdecimal1 > 0) and (cdecimal1 < 12345.5678) and (cdecimal2 <> 0) and (cdecimal2 > 1000) and cdouble is not null) (type: boolean) Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdecimal1 + cdecimal2) (type: decimal(25,14)), (cdecimal1 - (2 * cdecimal2)) (type: decimal(26,14)), ((cdecimal1 + 2.34) / cdecimal2) (type: decimal(38,23)), (cdecimal1 * (cdecimal2 / 3.4)) (type: decimal(38,28)), (cdecimal1 % 10) (type: decimal(12,10)), UDFToInteger(cdecimal1) (type: int), UDFToShort(cdecimal2) (type: smallint), UDFToByte(cdecimal2) (type: tinyint), UDFToLong(cdecimal1) (type: bigint), UDFToBoolean(cdecimal1) (type: boolean), UDFToDouble(cdecimal2) (type: double), UDFToFloat(cdecimal1) (type: float), UDFToString(cdecimal2) (type: string), CAST( cdecimal1 AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + selectExpressions: DecimalColAddDecimalColumn(col 1, col 2) -> 3:decimal(25,14), DecimalColSubtractDecimalColumn(col 1, col 4)(children: DecimalScalarMultiplyDecimalColumn(val 2, col 2) -> 4:decimal(25,14)) -> 5:decimal(26,14), DecimalColDivideDecimalColumn(col 6, col 2)(children: DecimalColAddDecimalScalar(col 1, val 2.34) -> 6:decimal(21,10)) -> 7:decimal(38,23), DecimalColMultiplyDecimalColumn(col 1, col 8)(children: DecimalColDivideDecimalScalar(col 2, val 3.4) -> 8:decimal(28,18)) -> 9:decimal(38,28), DecimalColModuloDecimalScalar(col 1, val 10) -> 10:decimal(12,10), CastDecimalToLong(col 1) -> 11:long, CastDecimalToLong(col 2) -> 12:long, CastDecimalToLong(col 2) -> 13:long, CastDecimalToLong(col 1) -> 14:long, CastDecimalToBoolean(col 1) -> 15:long, CastDecimalToDouble(col 2) -> 16:double, CastDecimalToDouble(col 1) -> 17:double, CastDecimalToString(col 2) -> 18:String, CastDecimalToTimestamp(col 1) -> 19:timestamp Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(25,14)), _col1 (type: decimal(26,14)), _col2 (type: decimal(38,23)), _col3 (type: decimal(38,28)), _col4 (type: decimal(12,10)), _col5 (type: int), _col6 (type: smallint), _col7 (type: tinyint), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: double), _col11 (type: float), _col12 (type: string), _col13 (type: timestamp) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(25,14)), KEY.reducesinkkey1 (type: decimal(26,14)), KEY.reducesinkkey2 (type: decimal(38,23)), KEY.reducesinkkey3 (type: decimal(38,28)), KEY.reducesinkkey4 (type: decimal(12,10)), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: smallint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: string), KEY.reducesinkkey13 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index 361c46b..6666923 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -95,12 +99,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:long) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -108,12 +123,21 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -121,25 +145,56 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:long) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(6,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(6,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out index 6d5b578..4a2ae48 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out @@ -14,7 +14,7 @@ POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) @@ -53,7 +53,7 @@ and sin(cdecimal1) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) @@ -90,22 +90,69 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cdecimal1) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_test + Statistics: Num rows: 12288 Data size: 2201752 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 4, val 0)(children: LongColModuloLongScalar(col 0, val 500) -> 4:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 6, val -1.0)(children: FuncSinDoubleToDouble(col 5)(children: CastDecimalToDouble(col 2) -> 5:double) -> 6:double) -> boolean) -> boolean + predicate: (((cbigint % 500) = 0) and (sin(cdecimal1) >= -1.0)) (type: boolean) + Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdecimal1 (type: decimal(20,10)), round(cdecimal1, 2) (type: decimal(13,2)), round(cdecimal1) (type: decimal(11,0)), floor(cdecimal1) (type: decimal(11,0)), ceil(cdecimal1) (type: decimal(11,0)), round(exp(cdecimal1), 58) (type: double), ln(cdecimal1) (type: double), log10(cdecimal1) (type: double), log2(cdecimal1) (type: double), log2((cdecimal1 - 15601)) (type: double), log(2, cdecimal1) (type: double), power(log2(cdecimal1), 2) (type: double), power(log2(cdecimal1), 2) (type: double), sqrt(cdecimal1) (type: double), abs(cdecimal1) (type: decimal(20,10)), sin(cdecimal1) (type: double), asin(cdecimal1) (type: double), cos(cdecimal1) (type: double), acos(cdecimal1) (type: double), atan(cdecimal1) (type: double), degrees(cdecimal1) (type: double), radians(cdecimal1) (type: double), cdecimal1 (type: decimal(20,10)), (- cdecimal1) (type: decimal(20,10)), sign(cdecimal1) (type: int), cos(((- sin(log(cdecimal1))) + 3.14159)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 7, 8, 9, 10, 5, 11, 12, 13, 15, 6, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 2, 28, 4, 29] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 2, decimalPlaces 2) -> 7:decimal(13,2), FuncRoundDecimalToDecimal(col 2) -> 8:decimal(11,0), FuncFloorDecimalToDecimal(col 2) -> 9:decimal(11,0), FuncCeilDecimalToDecimal(col 2) -> 10:decimal(11,0), RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 58)(children: FuncExpDoubleToDouble(col 5)(children: CastDecimalToDouble(col 2) -> 5:double) -> 6:double) -> 5:double, FuncLnDoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 11:double, FuncLog10DoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 12:double, FuncLog2DoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 13:double, FuncLog2DoubleToDouble(col 6)(children: CastDecimalToDouble(col 14)(children: DecimalColSubtractDecimalScalar(col 2, val 15601) -> 14:decimal(21,10)) -> 6:double) -> 15:double, VectorUDFAdaptor(log(2, cdecimal1)) -> 6:Double, VectorUDFAdaptor(power(log2(cdecimal1), 2))(children: FuncLog2DoubleToDouble(col 16)(children: CastDecimalToDouble(col 2) -> 16:double) -> 17:double) -> 16:Double, VectorUDFAdaptor(power(log2(cdecimal1), 2))(children: FuncLog2DoubleToDouble(col 17)(children: CastDecimalToDouble(col 2) -> 17:double) -> 18:double) -> 17:Double, FuncSqrtDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 19:double, FuncAbsDecimalToDecimal(col 2) -> 20:decimal(20,10), FuncSinDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 21:double, FuncASinDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 22:double, FuncCosDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 23:double, FuncACosDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 24:double, FuncATanDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 25:double, FuncDegreesDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 26:double, FuncRadiansDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 27:double, FuncNegateDecimalToDecimal(col 2) -> 28:decimal(20,10), FuncSignDecimalToLong(col 2) -> 4:long, FuncCosDoubleToDouble(col 18)(children: DoubleColAddDoubleScalar(col 29, val 3.14159)(children: DoubleColUnaryMinus(col 18)(children: FuncSinDoubleToDouble(col 29)(children: FuncLnDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 29:double) -> 18:double) -> 29:double) -> 18:double) -> 29:double + Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: decimal_test - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cdecimal1) >= -1.0)) (type: boolean) - Select Operator - expressions: cdecimal1 (type: decimal(20,10)), round(cdecimal1, 2) (type: decimal(13,2)), round(cdecimal1) (type: decimal(11,0)), floor(cdecimal1) (type: decimal(11,0)), ceil(cdecimal1) (type: decimal(11,0)), round(exp(cdecimal1), 58) (type: double), ln(cdecimal1) (type: double), log10(cdecimal1) (type: double), log2(cdecimal1) (type: double), log2((cdecimal1 - 15601)) (type: double), log(2, cdecimal1) (type: double), power(log2(cdecimal1), 2) (type: double), power(log2(cdecimal1), 2) (type: double), sqrt(cdecimal1) (type: double), abs(cdecimal1) (type: decimal(20,10)), sin(cdecimal1) (type: double), asin(cdecimal1) (type: double), cos(cdecimal1) (type: double), acos(cdecimal1) (type: double), atan(cdecimal1) (type: double), degrees(cdecimal1) (type: double), radians(cdecimal1) (type: double), cdecimal1 (type: decimal(20,10)), (- cdecimal1) (type: decimal(20,10)), sign(cdecimal1) (type: int), cos(((- sin(log(cdecimal1))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 - ListSink + ListSink PREHOOK: query: select cdecimal1 diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 5583874..90f80a3 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -545,10 +545,14 @@ NULL NULL 123456789.0123456789 15241578753238836.75019051998750190521 1234567890.1234560000 NULL 1234567890.1234567890 NULL -PREHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -566,12 +570,26 @@ STAGE PLANS: TableScan alias: decimal_precision Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(20,10)) outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(dec), sum(dec) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimal(col 0) -> struct, VectorUDAFSumDecimal(col 0) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE @@ -581,8 +599,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 4063199..b99935d 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -30,12 +30,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -64,15 +68,33 @@ STAGE PLANS: value expressions: _col1 (type: decimal(11,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -94,12 +116,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,15 +154,33 @@ STAGE PLANS: value expressions: _col0 (type: decimal(10,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -186,12 +230,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -220,15 +268,33 @@ STAGE PLANS: value expressions: _col1 (type: decimal(11,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -250,12 +316,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -284,15 +354,33 @@ STAGE PLANS: value expressions: _col0 (type: decimal(10,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -342,12 +430,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -365,26 +457,61 @@ STAGE PLANS: TableScan alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -406,12 +533,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -429,26 +560,60 @@ STAGE PLANS: TableScan alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index 882e83d..5d28d22 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), @@ -51,13 +51,17 @@ POSTHOOK: query: -- EXPLAIN -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8) FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -75,26 +79,61 @@ STAGE PLANS: TableScan alias: decimal_tbl_1_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: round(dec) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 5:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 13:decimal(21,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -153,7 +192,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### 125.315000000000000000 -125.315000000000000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -163,7 +202,7 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -173,6 +212,10 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -190,26 +233,61 @@ STAGE PLANS: TableScan alias: decimal_tbl_2_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(pos) (type: decimal(21,0)), round(pos, 0) (type: decimal(21,0)), round(pos, 1) (type: decimal(22,1)), round(pos, 2) (type: decimal(23,2)), round(pos, 3) (type: decimal(24,3)), round(pos, 4) (type: decimal(25,4)), round(pos, -1) (type: decimal(21,0)), round(pos, -2) (type: decimal(21,0)), round(pos, -3) (type: decimal(21,0)), round(pos, -4) (type: decimal(21,0)), round(neg) (type: decimal(21,0)), round(neg, 0) (type: decimal(21,0)), round(neg, 1) (type: decimal(22,1)), round(neg, 2) (type: decimal(23,2)), round(neg, 3) (type: decimal(24,3)), round(neg, 4) (type: decimal(25,4)), round(neg, -1) (type: decimal(21,0)), round(neg, -2) (type: decimal(21,0)), round(neg, -3) (type: decimal(21,0)), round(neg, -4) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 7:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 11:decimal(21,0), FuncRoundDecimalToDecimal(col 1) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 0) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 1) -> 14:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 2) -> 15:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 3) -> 16:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 4) -> 17:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -1) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -2) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -3) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -4) -> 21:decimal(21,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(25,4)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(22,1)), VALUE._col12 (type: decimal(23,2)), VALUE._col13 (type: decimal(24,3)), VALUE._col14 (type: decimal(25,4)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(21,0)), VALUE._col17 (type: decimal(21,0)), VALUE._col18 (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -273,7 +351,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### 3.141592653589793000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -294,7 +372,7 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -315,6 +393,10 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -332,26 +414,61 @@ STAGE PLANS: TableScan alias: decimal_tbl_3_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: round(dec, -15) (type: decimal(21,0)), round(dec, -16) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -13) (type: decimal(21,0)), round(dec, 4) (type: decimal(25,4)), round(dec, 5) (type: decimal(26,5)), round(dec, 6) (type: decimal(27,6)), round(dec, 7) (type: decimal(28,7)), round(dec, 8) (type: decimal(29,8)), round(dec, 9) (type: decimal(30,9)), round(dec, 10) (type: decimal(31,10)), round(dec, 11) (type: decimal(32,11)), round(dec, 12) (type: decimal(33,12)), round(dec, 13) (type: decimal(34,13)), round(dec, -14) (type: decimal(21,0)), round(dec, 14) (type: decimal(35,14)), round(dec, 15) (type: decimal(36,15)), round(dec, 16) (type: decimal(37,16)), round(dec, -11) (type: decimal(21,0)), round(dec, -12) (type: decimal(21,0)), round(dec, -9) (type: decimal(21,0)), round(dec, -10) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col31, _col32, _col33, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -15) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -16) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 10:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 11:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 12:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -13) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 14:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 5) -> 15:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 6) -> 16:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 7) -> 17:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 8) -> 18:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 19:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 10) -> 20:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 11) -> 21:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 12) -> 22:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 13) -> 23:decimal(34,13), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -14) -> 24:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 14) -> 25:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 15) -> 26:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 16) -> 27:decimal(37,16), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -11) -> 28:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -12) -> 29:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -9) -> 30:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -10) -> 31:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 32:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 33:decimal(21,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(21,0)), VALUE._col2 (type: decimal(21,0)), VALUE._col3 (type: decimal(21,0)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)), VALUE._col12 (type: decimal(21,0)), VALUE._col13 (type: decimal(21,0)), VALUE._col14 (type: decimal(21,0)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(22,1)), VALUE._col17 (type: decimal(23,2)), VALUE._col18 (type: decimal(24,3)), VALUE._col19 (type: decimal(25,4)), VALUE._col20 (type: decimal(26,5)), VALUE._col21 (type: decimal(27,6)), VALUE._col22 (type: decimal(28,7)), VALUE._col23 (type: decimal(29,8)), VALUE._col24 (type: decimal(30,9)), VALUE._col25 (type: decimal(31,10)), VALUE._col26 (type: decimal(32,11)), VALUE._col27 (type: decimal(33,12)), VALUE._col28 (type: decimal(34,13)), VALUE._col28 (type: decimal(34,13)), VALUE._col29 (type: decimal(35,14)), VALUE._col30 (type: decimal(36,15)), VALUE._col31 (type: decimal(37,16)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -438,14 +555,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_4_orc #### A masked pattern was here #### 1809242.315111134400000000 -1809242.315111134400000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -463,26 +584,62 @@ STAGE PLANS: TableScan alias: decimal_tbl_4_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 2:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 9) -> 3:decimal(30,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(30,9)), VALUE._col0 (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + selectExpressions: ConstantVectorExpression(val 1809242.315111134) -> 2:decimal(17,9), ConstantVectorExpression(val -1809242.315111134) -> 3:decimal(17,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 40dc99a..faf8508 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -49,11 +49,15 @@ POSTHOOK: Output: default@decimal_udf POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key + key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key + key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,12 +72,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key + key) (type: decimal(21,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColAddDecimalColumn(col 0, col 0) -> 2:decimal(21,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -81,6 +96,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -134,10 +157,14 @@ NULL 2.0000000000 -2469135780.2469135780 2469135780.2469135600 -PREHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -152,12 +179,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key + CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DecimalColAddDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(21,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -165,6 +203,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -218,10 +264,14 @@ NULL 2.0000000000 -2469135780.1234567890 2469135780.1234567800 -PREHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -236,12 +286,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) + (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColAddDoubleColumn(col 2, col 4)(children: CastDecimalToDouble(col 0) -> 2:double, DoubleColDivideDoubleScalar(col 3, val 2.0)(children: CastLongToDouble(col 1) -> 3:double) -> 4:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -249,6 +310,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -302,10 +371,14 @@ NULL 1.5 -1.8518518351234567E9 1.8518518351234567E9 -PREHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -320,12 +393,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) + 1.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColAddDoubleScalar(col 2, val 1.0)(children: CastDecimalToDouble(col 0) -> 2:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -333,6 +417,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -387,11 +479,15 @@ NULL -1.2345678891234567E9 1.2345678911234567E9 PREHOOK: query: -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key - key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key - key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -406,12 +502,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key - key) (type: decimal(21,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColSubtractDecimalColumn(col 0, col 0) -> 2:decimal(21,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -419,6 +526,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -472,10 +587,14 @@ NULL 0.0000000000 0.0000000000 0.0000000000 -PREHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -490,12 +609,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key - CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DecimalColSubtractDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(21,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -503,6 +633,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -556,10 +694,14 @@ NULL 0.0000000000 -0.1234567890 0.1234567800 -PREHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -574,12 +716,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) - (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColSubtractDoubleColumn(col 2, col 4)(children: CastDecimalToDouble(col 0) -> 2:double, DoubleColDivideDoubleScalar(col 3, val 2.0)(children: CastLongToDouble(col 1) -> 3:double) -> 4:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -587,6 +740,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -640,10 +801,14 @@ NULL 0.5 -6.172839451234567E8 6.172839451234567E8 -PREHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -658,12 +823,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) - 1.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColSubtractDoubleScalar(col 2, val 1.0)(children: CastDecimalToDouble(col 0) -> 2:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -671,6 +847,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -725,11 +909,15 @@ NULL -1.2345678911234567E9 1.2345678891234567E9 PREHOOK: query: -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key * key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key * key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -744,12 +932,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key * key) (type: decimal(38,20)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColMultiplyDecimalColumn(col 0, col 0) -> 2:decimal(38,20) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -757,6 +956,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -810,10 +1017,14 @@ NULL 1.00000000000000000000 NULL NULL -PREHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key, value FROM DECIMAL_UDF where key * value > 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key, value FROM DECIMAL_UDF where key * value > 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -828,15 +1039,29 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColGreaterDecimalScalar(col 3, val 0)(children: DecimalColMultiplyDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(31,10)) -> boolean predicate: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -844,6 +1069,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -882,10 +1115,14 @@ POSTHOOK: Input: default@decimal_udf 1.0000000000 1 -1234567890.1234567890 -1234567890 1234567890.1234567800 1234567890 -PREHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -900,12 +1137,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key * CAST( value AS decimal(10,0))) (type: decimal(31,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DecimalColMultiplyDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(31,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -913,6 +1161,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -966,10 +1222,14 @@ NULL 1.0000000000 1524157875171467887.5019052100 1524157875171467876.3907942000 -PREHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -984,12 +1244,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) * (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColMultiplyDoubleColumn(col 2, col 4)(children: CastDecimalToDouble(col 0) -> 2:double, DoubleColDivideDoubleScalar(col 3, val 2.0)(children: CastLongToDouble(col 1) -> 3:double) -> 4:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -997,6 +1268,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1050,10 +1329,14 @@ NULL 0.5 7.6207893758573389E17 7.6207893758573389E17 -PREHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * '2.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * '2.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1068,12 +1351,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) * 2.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColMultiplyDoubleScalar(col 2, val 2.0)(children: CastDecimalToDouble(col 0) -> 2:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1081,6 +1375,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1135,11 +1437,15 @@ NULL -2.4691357802469134E9 2.4691357802469134E9 PREHOOK: query: -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +EXPLAIN VECTORIZATION EXPRESSION SELECT key / 0 FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +EXPLAIN VECTORIZATION EXPRESSION SELECT key / 0 FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1154,15 +1460,29 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key / 0) (type: decimal(28,18)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColDivideDecimalScalar(col 0, val 0) -> 2:decimal(28,18) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1170,6 +1490,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1186,10 +1514,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / NULL FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / NULL FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1220,6 +1552,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: org.apache.hadoop.hive.ql.metadata.HiveException: Could not instantiate DoubleColDivideDoubleScalar with arguments arguments: [0, ConstantVectorExpression(val null) -> 1:double, 2], argument classes: [Integer, ConstantVectorExpression, Integer], exception: java.lang.IllegalArgumentException stack trace: sun.reflect.GeneratedConstructorAccessor.newInstance(Unknown Source), sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45), java.lang.reflect.Constructor.newInstance(Constructor.java:526), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.instantiateExpression(VectorizationContext.java:1335), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.createVectorExpression(VectorizationContext.java:1233), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressionForUdf(VectorizationContext.java:1198), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getGenericUdfVectorExpression(VectorizationContext.java:1400), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:542), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2102), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2092), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateSelectOperator(Vectorizer.java:1812), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateMapWorkOperator(Vectorizer.java:1647), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$MapWorkValidationNodeProcessor.process(Vectorizer.java:1282), org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105), ... + vectorized: false Stage: Stage-0 Fetch Operator @@ -1236,10 +1574,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1254,15 +1596,30 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColNotEqualDecimalScalar(col 0, val 0) -> boolean predicate: (key <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / key) (type: decimal(38,24)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColDivideDecimalColumn(col 0, col 0) -> 2:decimal(38,24) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1270,6 +1627,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1319,10 +1684,14 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000000 1.000000000000000000000000 1.000000000000000000000000 -PREHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1337,15 +1706,30 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColNotEqualLongScalar(col 1, val 0) -> boolean predicate: (value <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DecimalColDivideDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(31,21) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1353,6 +1737,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1392,10 +1784,14 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000 1.000000000100000000000 1.000000000099999992710 -PREHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1410,15 +1806,30 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColNotEqualLongScalar(col 1, val 0) -> boolean predicate: (value <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColDivideDoubleColumn(col 2, col 4)(children: CastDecimalToDouble(col 0) -> 2:double, DoubleColDivideDoubleScalar(col 3, val 2.0)(children: CastLongToDouble(col 1) -> 3:double) -> 4:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1426,6 +1837,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1465,10 +1884,14 @@ POSTHOOK: Input: default@decimal_udf 2.0 2.0000000002 2.0000000002 -PREHOOK: query: EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1483,12 +1906,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (1.0 + (UDFToDouble(key) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DoubleScalarAddDoubleColumn(val 1.0, col 3)(children: DoubleColDivideDoubleScalar(col 2, val 2.0)(children: CastDecimalToDouble(col 0) -> 2:double) -> 3:double) -> 2:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1496,6 +1930,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1550,11 +1992,15 @@ NULL -6.172839440617284E8 6.172839460617284E8 PREHOOK: query: -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT abs(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT abs(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1569,12 +2015,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: abs(key) (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: FuncAbsDecimalToDecimal(col 0) -> 2:decimal(20,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1582,6 +2039,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1636,11 +2101,15 @@ NULL 1234567890.1234567890 1234567890.1234567800 PREHOOK: query: -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value PREHOOK: type: QUERY POSTHOOK: query: -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1659,12 +2128,27 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(key), count(key), avg(key) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0) -> decimal(38,18), VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgDecimal(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1677,8 +2161,21 @@ STAGE PLANS: value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) @@ -1697,13 +2194,27 @@ STAGE PLANS: value expressions: _col1 (type: decimal(38,23)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,23)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1742,11 +2253,15 @@ POSTHOOK: Input: default@decimal_udf 4400 -4400.00000000000000000000000 -4400.00000000000000 -4400.0000000000 1234567890 1234567890.12345678000000000000000 1234567890.12345678000000 1234567890.1234567800 PREHOOK: query: -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT -key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT -key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1761,12 +2276,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (- key) (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: FuncNegateDecimalToDecimal(col 0) -> 2:decimal(20,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1774,6 +2300,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1828,25 +2362,66 @@ NULL 1234567890.1234567890 -1234567890.1234567800 PREHOOK: query: -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT +key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: decimal(20,10)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: decimal_udf - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1981,11 +2556,15 @@ NULL -1234567890 1234567891 PREHOOK: query: -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT FLOOR(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT FLOOR(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2000,12 +2579,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: floor(key) (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: FuncFloorDecimalToDecimal(col 0) -> 2:decimal(11,0) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2013,6 +2603,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -2067,11 +2665,15 @@ NULL -1234567891 1234567890 PREHOOK: query: -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT ROUND(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT ROUND(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2086,12 +2688,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(key, 2) (type: decimal(13,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 2:decimal(13,2) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2099,6 +2712,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -2153,11 +2774,15 @@ NULL -1234567890.12 1234567890.12 PREHOOK: query: -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2172,12 +2797,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: VectorUDFAdaptor(power(key, 2)) -> 2:Double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2185,6 +2821,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -2239,11 +2883,15 @@ NULL 1.52415787532388352E18 1.52415787532388352E18 PREHOOK: query: -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2258,12 +2906,23 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ((key + 1) % (key / 2)) (type: decimal(28,18)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: DecimalColModuloDecimalColumn(col 2, col 3)(children: DecimalColAddDecimalScalar(col 0, val 1) -> 2:decimal(21,10), DecimalColDivideDecimalScalar(col 0, val 2) -> 3:decimal(28,18)) -> 4:decimal(28,18) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2271,6 +2930,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -2325,11 +2992,15 @@ NULL -617283944.061728394500000000 1.000000000000000000 PREHOOK: query: -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY POSTHOOK: query: -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2347,12 +3018,27 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev(key), variance(key) + Group By Vectorization: + aggregators: VectorUDAFStdPopDecimal(col 0) -> struct, VectorUDAFVarPopDecimal(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2365,8 +3051,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev(VALUE._col0), variance(VALUE._col1) @@ -2414,11 +3113,15 @@ POSTHOOK: Input: default@decimal_udf 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY POSTHOOK: query: -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2436,12 +3139,27 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(key), var_samp(key) + Group By Vectorization: + aggregators: VectorUDAFStdSampDecimal(col 0) -> struct, VectorUDAFVarSampDecimal(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2454,8 +3172,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) @@ -2503,11 +3234,15 @@ POSTHOOK: Input: default@decimal_udf 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2540,8 +3275,19 @@ STAGE PLANS: value expressions: _col0 (type: array) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF histogram_numeric not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF histogram_numeric not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: histogram_numeric(VALUE._col0) @@ -2572,11 +3318,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### [{"x":-1.2345678901234567E9,"y":1.0},{"x":-144.50057142857142,"y":35.0},{"x":1.2345678901234567E9,"y":1.0}] PREHOOK: query: -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2594,31 +3344,73 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(key) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 0) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 0) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2641,11 +3433,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -1234567890.1234567890 PREHOOK: query: -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2663,31 +3459,73 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(key) + Group By Vectorization: + aggregators: VectorUDAFMaxDecimal(col 0) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxDecimal(col 0) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2710,11 +3548,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 1234567890.1234567800 PREHOOK: query: -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2732,31 +3574,73 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(key) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out index 5ea9f4d..90f3371 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out @@ -48,14 +48,18 @@ POSTHOOK: Input: default@decimal_udf2_txt POSTHOOK: Output: default@decimal_udf2 POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -70,15 +74,30 @@ STAGE PLANS: TableScan alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8] + selectExpressions: ConstantVectorExpression(val NaN) -> 2:double, ConstantVectorExpression(val NaN) -> 3:double, ConstantVectorExpression(val 1.4711276743037347) -> 4:double, ConstantVectorExpression(val -0.8390715290764524) -> 5:double, ConstantVectorExpression(val -0.5440211108893698) -> 6:double, ConstantVectorExpression(val 0.6483608274590866) -> 7:double, ConstantVectorExpression(val 0.17453292519943295) -> 8:double Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -86,6 +105,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -104,20 +131,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### NaN NaN 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -132,15 +163,30 @@ STAGE PLANS: TableScan alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 2:double, ConstantVectorExpression(val 2.302585092994046) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 1.0) -> 5:double, FuncLogWithBaseLongToDouble(col 1) -> 6:double, VectorUDFAdaptor(log(value, 10)) -> 7:Double, ConstantVectorExpression(val 1.0) -> 8:double, ConstantVectorExpression(val 3.1622776601683795) -> 9:double Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -148,6 +194,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index 188c624..c63c486 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,11 +132,24 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -141,13 +158,38 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -155,9 +197,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_elt.q.out ql/src/test/results/clientpositive/llap/vector_elt.q.out index bb66867..44ba6de 100644 --- ql/src/test/results/clientpositive/llap/vector_elt.q.out +++ ql/src/test/results/clientpositive/llap/vector_elt.q.out @@ -1,29 +1,80 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 935842 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean + predicate: (ctinyint > 0) (type: boolean) + Statistics: Num rows: 4096 Data size: 312018 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 6, 2, 16] + selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string + Statistics: Num rows: 4096 Data size: 1069830 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (ctinyint > 0) (type: boolean) - Select Operator - expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc @@ -47,7 +98,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -60,7 +111,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -73,22 +124,68 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: ConstantVectorExpression(val defg) -> 12:string, ConstantVectorExpression(val cc) -> 13:string, ConstantVectorExpression(val abc) -> 14:string, ConstantVectorExpression(val 2) -> 15:string, ConstantVectorExpression(val 12345) -> 16:string, ConstantVectorExpression(val 123456789012) -> 17:string, ConstantVectorExpression(val 1.25) -> 18:string, ConstantVectorExpression(val 16.0) -> 19:string, ConstantVectorExpression(val null) -> 20:string, ConstantVectorExpression(val null) -> 21:string + Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Limit - Number of rows: 1 - ListSink + ListSink PREHOOK: query: SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), diff --git ql/src/test/results/clientpositive/llap/vector_groupby4.q.out ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index 1e24e81..258c00c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -50,21 +54,55 @@ STAGE PLANS: TableScan alias: srcorc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: substr(key, 1, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: StringSubstrColStartLen(col 0, start 0, length 1) -> 2:string Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 @@ -73,17 +111,37 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby6.q.out ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 758f70c..9ed40df 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -50,21 +54,55 @@ STAGE PLANS: TableScan alias: srcorc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: substr(value, 5, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: StringSubstrColStartLen(col 1, start 4, length 1) -> 2:string Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 @@ -73,17 +111,37 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index fb5dfe6..c97863a 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,12 +132,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -142,15 +160,41 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -158,9 +202,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index ff658d7..9a09b89 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,6 +1,6 @@ Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization expression select * from src where not key in @@ -8,65 +8,199 @@ where not key in order by key PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization expression select * from src where not key in (select key from src) order by key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 4 <- Map 3 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_36] - Select Operator [SEL_35] (rows=1 width=178) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] - Select Operator [SEL_20] (rows=1 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_19] (rows=1 width=265) - predicate:_col3 is null - Map Join Operator [MAPJOIN_28] (rows=1219 width=265) - Conds:MAPJOIN_27._col0=RS_17._col0(Left Outer),Output:["_col0","_col1","_col3"] - <-Map 5 [BROADCAST_EDGE] llap - BROADCAST [RS_17] - PartitionCols:_col0 - Select Operator [SEL_12] (rows=500 width=87) - Output:["_col0"] - TableScan [TS_11] (rows=500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map Join Operator [MAPJOIN_27] (rows=500 width=178) - Conds:(Inner),Output:["_col0","_col1"] - <-Reducer 4 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_34] - Select Operator [SEL_33] (rows=1 width=8) - Filter Operator [FIL_32] (rows=1 width=8) - predicate:(_col0 = 0) - Group By Operator [GBY_31] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_4] (rows=1 width=87) - Filter Operator [FIL_25] (rows=1 width=87) - predicate:key is null - TableScan [TS_2] (rows=500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1219 Data size: 323035 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is null (type: boolean) + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 0, val 0) -> boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index c4bcbab..8599e97 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -211,7 +211,7 @@ POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sa POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number from @@ -219,7 +219,7 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number from @@ -227,6 +227,10 @@ from group by ss_ticket_number limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -244,11 +248,24 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9 + native: false + projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -257,23 +274,55 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -324,7 +373,7 @@ POSTHOOK: Input: default@store_sales 18 19 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(ss_ticket_number) m from @@ -336,7 +385,7 @@ from group by ss_ticket_number order by m PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(ss_ticket_number) m from @@ -348,6 +397,10 @@ from group by ss_ticket_number order by m POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -366,11 +419,24 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9 + native: false + projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -379,19 +445,51 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 @@ -399,20 +497,43 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -533,7 +654,7 @@ POSTHOOK: Input: default@store_sales 80 81 82 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -545,7 +666,7 @@ from group by ss_ticket_number order by ss_ticket_number PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -557,6 +678,10 @@ from group by ss_ticket_number order by ss_ticket_number POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -575,12 +700,26 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 2, 10] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9, col 2 + native: false + projectedOutputColumns: [0] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -589,15 +728,42 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 2) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -605,9 +771,20 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] keys: _col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -615,17 +792,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -746,7 +942,7 @@ POSTHOOK: Input: default@store_sales 80 151471 704 81 105109 429 82 55611 254 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from @@ -758,7 +954,7 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from @@ -770,6 +966,10 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -788,12 +988,26 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 2, 10] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9, col 2 + native: false + projectedOutputColumns: [0] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -802,15 +1016,41 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 2) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -818,9 +1058,20 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -828,17 +1079,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 8e55ce3..ef3073c 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -127,16 +127,20 @@ POSTHOOK: Lineage: store.s_street_type SIMPLE [(store_txt)store_txt.FieldSchema( POSTHOOK: Lineage: store.s_suite_number SIMPLE [(store_txt)store_txt.FieldSchema(name:s_suite_number, type:string, comment:null), ] POSTHOOK: Lineage: store.s_tax_precentage SIMPLE [(store_txt)store_txt.FieldSchema(name:s_tax_precentage, type:decimal(5,2), comment:null), ] POSTHOOK: Lineage: store.s_zip SIMPLE [(store_txt)store_txt.FieldSchema(name:s_zip, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s_store_id from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s_store_id from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,8 +174,19 @@ STAGE PLANS: Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: Pruning grouping set id not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -212,16 +227,20 @@ AAAAAAAAEAAAAAAA AAAAAAAAHAAAAAAA AAAAAAAAIAAAAAAA AAAAAAAAKAAAAAAA -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -255,10 +274,29 @@ STAGE PLANS: Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -266,9 +304,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_if_expr.q.out ql/src/test/results/clientpositive/llap/vector_if_expr.q.out index 555340d..45cf8e6 100644 --- ql/src/test/results/clientpositive/llap/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/llap/vector_if_expr.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -21,29 +25,68 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsTrue(col 10) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: (cboolean1 and cboolean1 is not null) (type: boolean) Statistics: Num rows: 4587 Data size: 13704 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 12] + selectExpressions: IfExprStringScalarStringScalar(col 10, val first, val second) -> 12:String Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out index e939c67..d7fa1c9 100644 --- ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out @@ -181,16 +181,20 @@ POSTHOOK: Lineage: customer_demographics.cd_gender SIMPLE [(customer_demographic POSTHOOK: Lineage: customer_demographics.cd_marital_status SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_marital_status, type:string, comment:null), ] POSTHOOK: Lineage: customer_demographics.cd_purchase_estimate SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_purchase_estimate, type:int, comment:null), ] Warning: Map Join MAPJOIN[13][bigTable=store_sales] in task 'Map 2' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -209,53 +213,120 @@ STAGE PLANS: TableScan alias: customer_demographics Statistics: Num rows: 200 Data size: 74200 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, Uniform Hash IS false Statistics: Num rows: 200 Data size: 74200 Basic stats: COMPLETE Column stats: NONE value expressions: cd_demo_sk (type: int), cd_marital_status (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: Not empty key IS false outputColumnNames: _col0, _col2, _col16 input vertices: 0 Map 1 Statistics: Num rows: 200000 Data size: 92055200 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0, col 2) -> boolean, FilterStringGroupColEqualStringScalar(col 1, val M) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0, col 2) -> boolean, FilterStringGroupColEqualStringScalar(col 1, val U) -> boolean) -> boolean) -> boolean predicate: (((_col0 = _col16) and (_col2 = 'M')) or ((_col0 = _col16) and (_col2 = 'U'))) (type: boolean) Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 3:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index ca07200..d9e701a 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -32,12 +32,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,12 +59,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -68,6 +83,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col1 input vertices: 1 Map 2 @@ -75,9 +97,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -85,25 +114,66 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -122,12 +192,16 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,12 +219,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -158,12 +243,23 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -171,19 +267,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -192,9 +318,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -249,12 +392,16 @@ POSTHOOK: Input: default@values__tmp__table__4 POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -272,12 +419,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -285,6 +443,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [2, 0] + smallTableMapping: [2] outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -292,9 +458,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -302,26 +475,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -340,12 +555,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -363,32 +582,71 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -396,12 +654,24 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [2, 0, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -409,6 +679,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Stage: Stage-0 Fetch Operator @@ -427,12 +711,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -450,12 +738,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -463,6 +762,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2, 0] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -470,9 +778,17 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 1] + selectExpressions: LongColMultiplyLongScalar(col 0, val 2) -> 3:long, LongColMultiplyLongScalar(col 0, val 5) -> 4:long Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -480,26 +796,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String, bigint, bigint Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -518,12 +876,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -541,12 +903,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -554,6 +927,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -561,9 +943,16 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -571,26 +960,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -609,12 +1040,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -632,12 +1067,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -645,6 +1091,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [1, 2, 0] + smallTableMapping: [2] outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -652,9 +1107,16 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -662,26 +1124,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Map 2 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -700,12 +1204,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -723,32 +1231,71 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -756,6 +1303,15 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [2, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 @@ -763,9 +1319,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -773,6 +1336,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Stage: Stage-0 Fetch Operator @@ -791,12 +1368,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -814,32 +1395,71 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -847,6 +1467,15 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [2, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 @@ -854,9 +1483,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -864,6 +1500,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_interval_1.q.out ql/src/test/results/clientpositive/llap/vector_interval_1.q.out index d8003ba..e7d1963 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_1.q.out @@ -39,7 +39,7 @@ POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] PREHOOK: query: -- constants/cast from string -explain +explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), @@ -47,13 +47,17 @@ select from vector_interval_1 order by str1 PREHOOK: type: QUERY POSTHOOK: query: -- constants/cast from string -explain +explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,26 +75,62 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: str1 (type: string), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time) outputColumnNames: _col0, _col2, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 4, 5] + selectExpressions: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col4 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: interval_year_month), VALUE._col0 (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 4, 2] + selectExpressions: ConstantVectorExpression(val 14) -> 3:long, ConstantVectorExpression(val 1 02:03:04.000000000) -> 4:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -122,7 +162,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 PREHOOK: query: -- interval arithmetic -explain +explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -134,7 +174,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- interval arithmetic -explain +explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -145,6 +185,10 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,26 +206,62 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 6, 5, 8, 7] + selectExpressions: IntervalYearMonthColAddIntervalYearMonthColumn(col 4, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 5:long, IntervalYearMonthColSubtractIntervalYearMonthColumn(col 4, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthScalarSubtractIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 7:long Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2-4 (type: interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4] + selectExpressions: ConstantVectorExpression(val 28) -> 5:long, ConstantVectorExpression(val 0) -> 6:long Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -220,7 +300,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -231,7 +311,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -242,6 +322,10 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -259,26 +343,62 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 6, 5, 8, 7] + selectExpressions: IntervalDayTimeColAddIntervalDayTimeColumn(col 4, col 5)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time) -> 6:interval_day_time, IntervalDayTimeScalarAddIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 5:timestamp, IntervalDayTimeColSubtractIntervalDayTimeColumn(col 4, col 7)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 7:interval_day_time) -> 8:interval_day_time, IntervalDayTimeScalarSubtractIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 7:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2 04:06:08.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4] + selectExpressions: ConstantVectorExpression(val 2 04:06:08.000000000) -> 5:interval_day_time, ConstantVectorExpression(val 0 00:00:00.000000000) -> 6:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -318,7 +438,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-interval arithmetic -explain +explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -336,7 +456,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-interval arithmetic -explain +explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -353,6 +473,10 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -370,26 +494,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17] + selectExpressions: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 4:long, DateColAddIntervalYearMonthColumn(col 1, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 1-2, col 1) -> 7:long, IntervalYearMonthColAddDateColumn(col 5, col 1)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:long, DateColSubtractIntervalYearMonthScalar(col 1, val 1-2) -> 9:long, DateColSubtractIntervalYearMonthColumn(col 1, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:long, DateColAddIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 11:timestamp, DateColAddIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddDateColumn(val 1 02:03:04.000000000, col 1) -> 14:timestamp, IntervalDayTimeColAddDateColumn(col 12, col 1)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:interval_day_time, DateColSubtractIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 16:timestamp, DateColSubtractIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -441,7 +600,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 PREHOOK: query: -- timestamp-interval arithmetic -explain +explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -459,7 +618,7 @@ select from vector_interval_1 order by ts PREHOOK: type: QUERY POSTHOOK: query: -- timestamp-interval arithmetic -explain +explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -476,6 +635,10 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -493,26 +656,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17] + selectExpressions: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 4:timestamp, TimestampColAddIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 1-2, col 0) -> 7:timestamp, IntervalYearMonthColAddTimestampColumn(col 5, col 0)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-2) -> 9:timestamp, TimestampColSubtractIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:timestamp, TimestampColAddIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 11:timestamp, TimestampColAddIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 1 02:03:04.000000000, col 0) -> 14:timestamp, IntervalDayTimeColAddTimestampColumn(col 12, col 0)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 16:timestamp, TimestampColSubtractIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -564,7 +762,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 PREHOOK: query: -- timestamp-timestamp arithmetic -explain +explain vectorization expression select ts, ts - ts, @@ -573,7 +771,7 @@ select from vector_interval_1 order by ts PREHOOK: type: QUERY POSTHOOK: query: -- timestamp-timestamp arithmetic -explain +explain vectorization expression select ts, ts - ts, @@ -581,6 +779,10 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -598,26 +800,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 5, 6] + selectExpressions: TimestampColSubtractTimestampColumn(col 0, col 0) -> 4:interval_day_time, TimestampScalarSubtractTimestampColumn(val 2001-01-01 01:02:03.0, col 0) -> 5:timestamp, TimestampColSubtractTimestampScalar(col 0, val 2001-01-01 01:02:03.0) -> 6:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -651,7 +888,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-date arithmetic -explain +explain vectorization expression select dt, dt - dt, @@ -660,7 +897,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-date arithmetic -explain +explain vectorization expression select dt, dt - dt, @@ -668,6 +905,10 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -685,26 +926,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 5, 6] + selectExpressions: DateColSubtractDateColumn(col 1, col 1) -> 4:timestamp, DateScalarSubtractDateColumn(val 2001-01-01 00:00:00.0, col 1) -> 5:timestamp, DateColSubtractDateScalar(col 1, val 2001-01-01 00:00:00.0) -> 6:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -738,7 +1014,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-timestamp arithmetic -explain +explain vectorization expression select dt, ts - dt, @@ -750,7 +1026,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-timestamp arithmetic -explain +explain vectorization expression select dt, ts - dt, @@ -761,6 +1037,10 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -778,26 +1058,61 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 5, 6, 7, 8, 9] + selectExpressions: TimestampColSubtractDateColumn(col 0, col 1) -> 4:interval_day_time, TimestampScalarSubtractDateColumn(val 2001-01-01 01:02:03.0, col 1) -> 5:interval_day_time, TimestampColSubtractDateScalar(col 0, val 2001-01-01 00:00:00.0) -> 6:interval_day_time, DateColSubtractTimestampColumn(col 1, col 0) -> 7:interval_day_time, DateColSubtractTimestampScalar(col 1, val 2001-01-01 01:02:03.0) -> 8:interval_day_time, DateScalarSubtractTimestampColumn(val 2001-01-01 00:00:00.0, col 0) -> 9:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_interval_2.q.out ql/src/test/results/clientpositive/llap/vector_interval_2.q.out index 23a977e..61702bd 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_2.q.out @@ -44,7 +44,7 @@ POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION [] POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION [] PREHOOK: query: -- interval comparisons in select clause -explain +explain vectorization expression select str1, -- Should all be true @@ -78,7 +78,7 @@ from vector_interval_2 order by str1 PREHOOK: type: QUERY POSTHOOK: query: -- interval comparisons in select clause -explain +explain vectorization expression select str1, -- Should all be true @@ -110,6 +110,10 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,26 +131,61 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] + selectExpressions: LongColEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, LongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 9:long, LongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 10:long, LongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 11:long, LongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 12:long, LongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 13:long, LongColGreaterLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 14:long, LongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 15:long, IntervalYearMonthColEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 16:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 17:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 18:long, IntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 19:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 20:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 21:long, IntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 22:long, IntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 23:long, IntervalYearMonthScalarEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 24:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 25:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 26:long, IntervalYearMonthScalarLessIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 27:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 28:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 29:long, IntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 30:long, IntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 31:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -227,7 +266,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str1, -- Should all be false @@ -253,7 +292,7 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str1, -- Should all be false @@ -279,6 +318,10 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -296,26 +339,61 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < 1-2) (type: boolean), (1-2 <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > 1-3) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + selectExpressions: LongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 9:long, IntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 10:long, IntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 11:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 12:long, IntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 13:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 14:long, IntervalYearMonthScalarLessIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 15:long, LongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 16:long, LongColGreaterLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 17:long, LongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 18:long, LongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 19:long, IntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 20:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 21:long, IntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 22:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 1, 6, 7, 8, 9, 10, 6, 11, 12, 13, 14, 15, 11] Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -384,7 +462,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 false false false false false false false false false false false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str3, -- Should all be true @@ -416,7 +494,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str3, -- Should all be true @@ -448,6 +526,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -465,26 +547,61 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) (type: boolean), (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] + selectExpressions: IntervalDayTimeColEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 8:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 9:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 10:long, IntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 11:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 12:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 13:long, IntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 14:long, IntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 15:long, IntervalDayTimeColEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 16:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 17:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 18:long, IntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 19:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 20:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 21:long, IntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 22:long, IntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 23:long, IntervalDayTimeScalarEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 24:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 25:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 26:long, IntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 27:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 28:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 29:long, IntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 30:long, IntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 31:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -565,7 +682,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str3, -- Should all be false @@ -591,7 +708,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str3, -- Should all be false @@ -617,6 +734,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -634,26 +755,61 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < 1 02:03:04.000000000) (type: boolean), (1 02:03:04.000000000 <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > 1 02:03:05.000000000) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + selectExpressions: IntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 8:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 9:long, IntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 10:long, IntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 11:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 12:long, IntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 13:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 14:long, IntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 15:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 16:long, IntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 17:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 18:long, IntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 19:long, IntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 20:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 21:long, IntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 22:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 1, 6, 7, 8, 9, 10, 6, 11, 12, 13, 14, 15, 11] Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -723,7 +879,7 @@ POSTHOOK: Input: default@vector_interval_2 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 false false false false false false false false false false false false false false false false false false PREHOOK: query: -- interval expressions in predicates -explain +explain vectorization expression select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -749,7 +905,7 @@ where order by ts PREHOOK: type: QUERY POSTHOOK: query: -- interval expressions in predicates -explain +explain vectorization expression select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -774,6 +930,10 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -791,28 +951,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterLongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterLongColGreaterLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterIntervalYearMonthColEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarLessIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean) -> boolean predicate: ((CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) and (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -878,7 +1076,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -903,7 +1101,7 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -928,6 +1126,10 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -945,28 +1147,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterIntervalDayTimeColEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean) -> boolean predicate: ((CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) and (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1032,7 +1272,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -1052,7 +1292,7 @@ where and dt != dt + interval '1-2' year to month order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -1072,6 +1312,10 @@ where and dt != dt + interval '1-2' year to month order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1089,28 +1333,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarLessEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarGreaterEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColLessEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColGreaterEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterLongColNotEqualLongColumn(col 1, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateScalarLessEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateScalarGreaterEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateColEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateColLessEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateColGreaterEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterLongColNotEqualLongColumn(col 1, col 7)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean) -> boolean predicate: ((2002-03-01 = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= 2002-03-01) and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 = (dt + 1-2)) and (2002-03-01 <= (dt + 1-2)) and (2002-03-01 >= (dt + 1-2)) and ((dt + 1-2) = 2002-03-01) and ((dt + 1-2) <= 2002-03-01) and ((dt + 1-2) >= 2002-03-01) and (dt <> (dt + 1-2))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1166,7 +1448,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1191,7 +1473,7 @@ where and ts > ts - interval '1' year order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1216,6 +1498,10 @@ where and ts > ts - interval '1' year order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1233,28 +1519,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2002-04-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2002-02-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2002-04-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2002-04-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2002-02-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2002-04-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 0-0) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean) -> boolean predicate: ((2002-03-01 01:02:03.0 = (ts + 1-2)) and (2002-03-01 01:02:03.0 <= (ts + 1-2)) and (2002-03-01 01:02:03.0 >= (ts + 1-2)) and (2002-04-01 01:02:03.0 <> (ts + 1-2)) and (2002-02-01 01:02:03.0 < (ts + 1-2)) and (2002-04-01 01:02:03.0 > (ts + 1-2)) and ((ts + 1-2) = 2002-03-01 01:02:03.0) and ((ts + 1-2) >= 2002-03-01 01:02:03.0) and ((ts + 1-2) <= 2002-03-01 01:02:03.0) and ((ts + 1-2) <> 2002-04-01 01:02:03.0) and ((ts + 1-2) > 2002-02-01 01:02:03.0) and ((ts + 1-2) < 2002-04-01 01:02:03.0) and (ts = (ts + 0-0)) and (ts <> (ts + 1-0)) and (ts <= (ts + 1-0)) and (ts < (ts + 1-0)) and (ts >= (ts - 1-0)) and (ts > (ts - 1-0))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1321,7 +1645,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 PREHOOK: query: -- day to second expressions in predicate -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1347,7 +1671,7 @@ where order by ts PREHOOK: type: QUERY POSTHOOK: query: -- day to second expressions in predicate -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1372,6 +1696,10 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1389,28 +1717,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean) -> boolean predicate: ((2001-01-01 01:02:03.0 = (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 <> (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 <= (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 < (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 >= (dt - 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 > (dt - 0 01:02:04.000000000)) and ((dt + 0 01:02:03.000000000) = 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) <> 2001-01-01 01:02:03.0) and ((dt + 0 01:02:03.000000000) >= 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) > 2001-01-01 01:02:03.0) and ((dt - 0 01:02:03.000000000) <= 2001-01-01 01:02:03.0) and ((dt - 0 01:02:04.000000000) < 2001-01-01 01:02:03.0) and (ts = (dt + 0 01:02:03.000000000)) and (ts <> (dt + 0 01:02:04.000000000)) and (ts <= (dt + 0 01:02:03.000000000)) and (ts < (dt + 0 01:02:04.000000000)) and (ts >= (dt - 0 01:02:03.000000000)) and (ts > (dt - 0 01:02:04.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1476,7 +1842,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1501,7 +1867,7 @@ where and ts > ts - interval '1' day order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1526,6 +1892,10 @@ where and ts > ts - interval '1' day order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1543,28 +1913,66 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean) -> boolean predicate: ((2001-01-01 01:02:03.0 = (ts + 0 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <> (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <= (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 < (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 >= (ts - 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 > (ts - 1 00:00:00.000000000)) and ((ts + 0 00:00:00.000000000) = 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) <> 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) >= 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) > 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) <= 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) < 2001-01-01 01:02:03.0) and (ts = (ts + 0 00:00:00.000000000)) and (ts <> (ts + 1 00:00:00.000000000)) and (ts <= (ts + 1 00:00:00.000000000)) and (ts < (ts + 1 00:00:00.000000000)) and (ts >= (ts - 1 00:00:00.000000000)) and (ts > (ts - 1 00:00:00.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out index 13a8b35..ab7a103 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out @@ -36,7 +36,7 @@ POSTHOOK: Lineage: interval_arithmetic_1.dateval EXPRESSION [(unique_timestamps) POSTHOOK: Lineage: interval_arithmetic_1.tsval SIMPLE [(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, comment:null), ] tsval tsval PREHOOK: query: -- interval year-month arithmetic -explain +explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -49,7 +49,7 @@ from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY POSTHOOK: query: -- interval year-month arithmetic -explain +explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -62,6 +62,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -79,26 +83,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 2-2) (type: date), (dateval - -2-2) (type: date), (dateval + 2-2) (type: date), (dateval + -2-2) (type: date), (-2-2 + dateval) (type: date), (2-2 + dateval) (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7] + selectExpressions: DateColSubtractIntervalYearMonthScalar(col 0, val 2-2) -> 2:long, DateColSubtractIntervalYearMonthScalar(col 0, val -2-2) -> 3:long, DateColAddIntervalYearMonthScalar(col 0, val 2-2) -> 4:long, DateColAddIntervalYearMonthScalar(col 0, val -2-2) -> 5:long, IntervalYearMonthScalarAddDateColumn(val -2-2, col 0) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 2-2, col 0) -> 7:long Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -188,7 +227,7 @@ dateval c1 c2 c3 c4 c5 c6 9075-06-13 9073-04-13 9077-08-13 9077-08-13 9073-04-13 9073-04-13 9077-08-13 9209-11-11 9207-09-11 9212-01-11 9212-01-11 9207-09-11 9207-09-11 9212-01-11 9403-01-09 9400-11-09 9405-03-09 9405-03-09 9400-11-09 9400-11-09 9405-03-09 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -197,7 +236,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -207,6 +246,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -224,26 +267,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 1999-06-07) (type: interval_day_time), (1999-06-07 - dateval) (type: interval_day_time), (dateval - dateval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4] + selectExpressions: DateColSubtractDateScalar(col 0, val 1999-06-07 00:00:00.0) -> 2:timestamp, DateScalarSubtractDateColumn(val 1999-06-07 00:00:00.0, col 0) -> 3:timestamp, DateColSubtractDateColumn(col 0, col 0) -> 4:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -327,7 +405,7 @@ dateval c1 c2 c3 9075-06-13 2584462 00:00:00.000000000 -2584462 00:00:00.000000000 0 00:00:00.000000000 9209-11-11 2633556 01:00:00.000000000 -2633556 01:00:00.000000000 0 00:00:00.000000000 9403-01-09 2704106 01:00:00.000000000 -2704106 01:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -339,7 +417,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -352,6 +430,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -369,26 +451,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tsval (type: timestamp), (tsval - 2-2) (type: timestamp), (tsval - -2-2) (type: timestamp), (tsval + 2-2) (type: timestamp), (tsval + -2-2) (type: timestamp), (-2-2 + tsval) (type: timestamp), (2-2 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7] + selectExpressions: TimestampColSubtractIntervalYearMonthScalar(col 1, val 2-2) -> 2:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 1, val -2-2) -> 3:timestamp, TimestampColAddIntervalYearMonthScalar(col 1, val 2-2) -> 4:timestamp, TimestampColAddIntervalYearMonthScalar(col 1, val -2-2) -> 5:timestamp, IntervalYearMonthScalarAddTimestampColumn(val -2-2, col 1) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 2-2, col 1) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -478,7 +595,7 @@ tsval c1 c2 c3 c4 c5 c6 9075-06-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9209-11-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9212-01-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9403-01-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 9405-03-09 18:12:33.547 9400-11-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -486,7 +603,7 @@ from interval_arithmetic_1 order by interval '2-2' year to month + interval '3-3' year to month limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -495,6 +612,10 @@ order by interval '2-2' year to month + interval '3-3' year to month limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -512,26 +633,64 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 5-5 (type: interval_year_month), -1-1 (type: interval_year_month) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: ConstantVectorExpression(val 65) -> 0:long, ConstantVectorExpression(val -13) -> 1:long Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -566,7 +725,7 @@ c0 c1 5-5 -1-1 5-5 -1-1 PREHOOK: query: -- interval day-time arithmetic -explain +explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -579,7 +738,7 @@ from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY POSTHOOK: query: -- interval day-time arithmetic -explain +explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -592,6 +751,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -609,26 +772,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 99 11:22:33.123456789) (type: timestamp), (dateval - -99 11:22:33.123456789) (type: timestamp), (dateval + 99 11:22:33.123456789) (type: timestamp), (dateval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + dateval) (type: timestamp), (99 11:22:33.123456789 + dateval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7] + selectExpressions: DateColSubtractIntervalDayTimeScalar(col 0, val 99 11:22:33.123456789) -> 2:timestamp, DateColSubtractIntervalDayTimeScalar(col 0, val -99 11:22:33.123456789) -> 3:timestamp, DateColAddIntervalDayTimeScalar(col 0, val 99 11:22:33.123456789) -> 4:timestamp, DateColAddIntervalDayTimeScalar(col 0, val -99 11:22:33.123456789) -> 5:timestamp, IntervalDayTimeScalarAddDateColumn(val -99 11:22:33.123456789, col 0) -> 6:timestamp, IntervalDayTimeScalarAddDateColumn(val 99 11:22:33.123456789, col 0) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -718,7 +916,7 @@ dateval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9075-09-20 11:22:33.123456789 9075-03-05 11:37:26.876543211 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9209-11-11 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9210-02-18 11:22:33.123456789 9209-08-03 13:37:26.876543211 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9403-01-09 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 9403-04-18 12:22:33.123456789 9402-10-01 13:37:26.876543211 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, tsval, @@ -728,7 +926,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, tsval, @@ -739,6 +937,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -756,26 +958,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), tsval (type: timestamp), (dateval - tsval) (type: interval_day_time), (tsval - dateval) (type: interval_day_time), (tsval - tsval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + selectExpressions: DateColSubtractTimestampColumn(col 0, col 1) -> 2:interval_day_time, TimestampColSubtractDateColumn(col 1, col 0) -> 3:interval_day_time, TimestampColSubtractTimestampColumn(col 1, col 1) -> 4:interval_day_time Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -861,7 +1098,7 @@ dateval tsval c2 c3 c4 9075-06-13 9075-06-13 16:20:09.218517797 -0 16:20:09.218517797 0 16:20:09.218517797 0 00:00:00.000000000 9209-11-11 9209-11-11 04:08:58.223768453 -0 04:08:58.223768453 0 04:08:58.223768453 0 00:00:00.000000000 9403-01-09 9403-01-09 18:12:33.547 -0 18:12:33.547000000 0 18:12:33.547000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -873,7 +1110,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -886,6 +1123,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -903,26 +1144,61 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tsval (type: timestamp), (tsval - 99 11:22:33.123456789) (type: timestamp), (tsval - -99 11:22:33.123456789) (type: timestamp), (tsval + 99 11:22:33.123456789) (type: timestamp), (tsval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + tsval) (type: timestamp), (99 11:22:33.123456789 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7] + selectExpressions: TimestampColSubtractIntervalDayTimeScalar(col 1, val 99 11:22:33.123456789) -> 2:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 1, val -99 11:22:33.123456789) -> 3:timestamp, TimestampColAddIntervalDayTimeScalar(col 1, val 99 11:22:33.123456789) -> 4:timestamp, TimestampColAddIntervalDayTimeScalar(col 1, val -99 11:22:33.123456789) -> 5:timestamp, IntervalDayTimeScalarAddTimestampColumn(val -99 11:22:33.123456789, col 1) -> 6:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 99 11:22:33.123456789, col 1) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1012,14 +1288,14 @@ tsval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 16:20:09.218517797 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9075-09-21 03:42:42.341974586 9075-03-06 03:57:36.095061008 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9209-11-11 04:08:58.223768453 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9210-02-18 15:31:31.347225242 9209-08-03 17:46:25.100311664 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9403-01-09 18:12:33.547 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 9403-04-19 06:35:06.670456789 9402-10-02 07:50:00.423543211 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second from interval_arithmetic_1 limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second @@ -1027,6 +1303,10 @@ from interval_arithmetic_1 limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1041,15 +1321,29 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: 109 20:30:40.246913578 (type: interval_day_time), 89 02:14:26.000000000 (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + selectExpressions: ConstantVectorExpression(val 109 20:30:40.246913578) -> 2:interval_day_time, ConstantVectorExpression(val 89 02:14:26.000000000) -> 3:interval_day_time Statistics: Num rows: 50 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1057,6 +1351,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 0bc0e4c..002d011 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -136,7 +136,7 @@ POSTHOOK: Lineage: vectortab_b_1korc.si SIMPLE [(vectortab_b_1k)vectortab_b_1k.F POSTHOOK: Lineage: vectortab_b_1korc.t SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts2 SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select v1.s, v2.s, @@ -158,7 +158,7 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select v1.s, v2.s, @@ -180,6 +180,10 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -197,12 +201,24 @@ STAGE PLANS: TableScan alias: vectortab_a_1korc Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 14] + selectExpressions: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -210,6 +226,10 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: interval_day_time) 1 _col0 (type: string), _col1 (type: interval_day_time) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -217,9 +237,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 8, 14] Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -227,25 +254,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: vectortab_b_1korc Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 14] + selectExpressions: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: interval_day_time) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: interval_day_time) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index bb6916b..9e591b8 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -14,7 +14,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -22,7 +22,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -30,6 +30,10 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,40 +53,93 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF sum parameter expression for GROUPBY operator: UDF GenericUDFHash(Column[_col2], Column[_col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -109,14 +166,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -124,15 +197,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) @@ -161,7 +249,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -169,7 +257,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -177,6 +265,10 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -196,34 +288,79 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF sum parameter expression for GROUPBY operator: UDF GenericUDFHash(Column[_col2], Column[_col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -250,14 +387,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -265,15 +418,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) @@ -302,7 +470,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -310,7 +478,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -318,6 +486,10 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -337,46 +509,106 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF sum parameter expression for GROUPBY operator: UDF GenericUDFHash(Column[_col2], Column[_col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -403,14 +635,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -442,7 +690,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -453,7 +701,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -464,6 +712,10 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -484,59 +736,136 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF sum parameter expression for GROUPBY operator: UDF GenericUDFHash(Column[_col2], Column[_col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -566,14 +895,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -581,28 +926,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 7 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -636,7 +1011,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -647,7 +1022,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -658,6 +1033,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -679,59 +1058,134 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -757,14 +1211,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -772,28 +1242,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -827,7 +1327,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -838,7 +1338,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -849,6 +1349,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -870,59 +1374,134 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -948,14 +1527,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -963,28 +1558,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1018,7 +1643,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1029,7 +1654,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1040,6 +1665,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1061,59 +1690,134 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -1139,14 +1843,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1154,28 +1874,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1209,7 +1959,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1220,7 +1970,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1231,6 +1981,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1252,59 +2006,134 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -1330,14 +2159,30 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1345,28 +2190,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out index 16603c7..c21da5f 100644 --- ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out @@ -97,9 +97,9 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index b9ffa34..25066be 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,6 +72,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -84,6 +96,14 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -100,8 +120,23 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 1163d24..fa35da7 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -80,12 +80,16 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,12 +174,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -260,12 +268,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -283,9 +295,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -296,6 +315,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -303,9 +327,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -313,23 +344,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: tjoin2 Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -350,12 +408,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -373,9 +435,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -386,6 +455,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -393,9 +467,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -403,23 +484,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: tjoin2 Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -440,12 +548,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -463,9 +575,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -476,6 +595,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -483,9 +606,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -493,23 +623,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: tjoin2 Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -530,12 +687,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -553,9 +714,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -566,6 +734,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -573,9 +745,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -583,23 +762,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: tjoin2 Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 69911f5..f3ffee8 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -132,91 +132,17 @@ POSTHOOK: query: select * from t4 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PREHOOK: query: explain vectorization only summary -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -234,91 +160,15 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -338,91 +188,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -434,100 +208,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 +PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 #### A masked pattern was here #### POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY @@ -545,91 +239,15 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -644,95 +262,15 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -747,95 +285,15 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -847,91 +305,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -948,91 +330,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -1063,215 +369,39 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -1299,636 +429,93 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 val_0 -0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -1956,120 +543,27 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -2104,107 +598,119 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +16 +18 +20 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -2260,113 +766,15 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -2415,83 +823,15 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -2503,10 +843,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2605,10 +951,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2709,10 +1061,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2805,10 +1163,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2916,10 +1280,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3015,10 +1385,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3118,10 +1494,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3218,10 +1600,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3319,10 +1707,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3434,10 +1828,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3534,10 +1934,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3670,10 +2076,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3780,10 +2192,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3919,10 +2337,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4052,10 +2476,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4193,10 +2623,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4334,10 +2770,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4477,10 +2919,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4631,10 +3079,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4786,10 +3240,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4874,10 +3334,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4885,80 +3351,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -4976,10 +3446,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4987,80 +3463,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5080,10 +3560,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5091,80 +3577,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5176,10 +3666,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5187,84 +3683,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -5287,10 +3786,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5298,80 +3803,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -5386,10 +3895,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5397,84 +3912,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -5489,10 +4007,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5500,84 +4024,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -5585,95 +4112,105 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5690,10 +4227,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5701,80 +4244,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -5805,10 +4352,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5816,80 +4369,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5905,10 +4462,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5916,104 +4479,108 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -6041,10 +4608,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6052,80 +4625,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -6151,10 +4728,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6162,107 +4745,112 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6290,10 +4878,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6301,89 +4895,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6423,10 +5024,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6434,94 +5041,91 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6564,10 +5168,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6575,94 +5185,91 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6705,10 +5312,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6716,94 +5329,91 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6848,105 +5458,108 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -7002,10 +5615,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7013,102 +5632,107 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -7157,10 +5781,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7168,72 +5798,68 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + native: false + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -7245,10 +5871,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7267,7 +5899,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7276,6 +5915,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -7283,22 +5927,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7307,18 +5986,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7347,10 +6061,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7369,7 +6089,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7378,6 +6105,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -7385,22 +6117,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7409,18 +6176,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7451,10 +6253,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7473,7 +6281,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7482,6 +6297,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -7489,22 +6309,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7513,18 +6368,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7547,10 +6437,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7569,7 +6465,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7578,6 +6481,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col1 input vertices: 1 Map 3 @@ -7585,26 +6493,65 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 15) -> boolean predicate: (key < 15) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 0 + native: false + projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -7613,18 +6560,53 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7658,10 +6640,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7680,7 +6668,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7689,6 +6684,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -7696,22 +6696,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1, val val_10) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -7720,18 +6755,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7757,10 +6827,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7779,14 +6855,31 @@ STAGE PLANS: TableScan alias: t3 Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean predicate: (key > 5) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7795,15 +6888,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7812,6 +6929,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col1 input vertices: 1 Map 1 @@ -7819,22 +6941,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7860,10 +7022,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7882,14 +7050,31 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean, FilterStringGroupColLessEqualStringScalar(col 1, val val_20) -> boolean) -> boolean predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -7898,15 +7083,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7915,6 +7124,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col1 input vertices: 1 Map 1 @@ -7922,22 +7136,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7960,10 +7214,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7982,14 +7242,31 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (key > 2) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7998,15 +7275,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8015,6 +7316,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 1 @@ -8022,18 +7328,54 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8061,10 +7403,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8083,7 +7431,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8092,6 +7447,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -8099,22 +7459,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8123,18 +7518,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8176,10 +7606,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8198,7 +7634,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8207,6 +7650,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -8214,22 +7662,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongScalarMultiplyLongColumn(val 2, col 0) -> 2:long) -> boolean predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8238,18 +7721,55 @@ STAGE PLANS: key expressions: (2 * _col0) (type: int) sort order: + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0) -> 1:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8276,10 +7796,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8298,7 +7824,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8309,6 +7842,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 @@ -8317,43 +7855,107 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8362,18 +7964,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8412,10 +8049,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8434,7 +8077,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8443,6 +8093,11 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -8450,22 +8105,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -8474,18 +8164,53 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8522,10 +8247,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8544,7 +8275,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8555,6 +8293,11 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -8563,22 +8306,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8587,22 +8365,56 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8611,18 +8423,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8661,10 +8508,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8683,6 +8536,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -8691,6 +8547,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -8699,31 +8560,82 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8732,18 +8644,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8794,10 +8741,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8816,35 +8769,88 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8853,9 +8859,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -8875,13 +8898,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -8935,10 +8976,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8957,23 +9004,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8982,21 +9062,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -9016,13 +9133,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9076,10 +9211,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9098,23 +9239,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9123,21 +9297,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -9157,13 +9368,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9219,10 +9448,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9241,23 +9476,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9266,21 +9534,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -9300,13 +9605,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9373,10 +9696,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9395,7 +9724,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9404,6 +9740,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -9414,6 +9755,11 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: 1 Map 4 @@ -9421,22 +9767,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9445,30 +9826,85 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9528,10 +9964,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9549,12 +9991,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -9562,12 +10015,20 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9575,19 +10036,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -9596,9 +10087,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -9616,10 +10124,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9638,7 +10152,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9647,6 +10168,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -9654,22 +10183,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9678,18 +10242,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9718,10 +10317,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9740,7 +10345,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9749,6 +10361,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -9756,22 +10376,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9780,18 +10435,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9822,10 +10512,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9844,7 +10540,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9853,6 +10556,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -9860,22 +10571,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -9884,18 +10630,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9918,10 +10699,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9940,7 +10727,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9949,6 +10743,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -9956,26 +10758,65 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 15) -> boolean predicate: (key < 15) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 0 + native: false + projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -9984,18 +10825,53 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10029,10 +10905,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10051,7 +10933,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10060,6 +10949,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10067,22 +10964,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1, val val_10) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -10091,18 +11023,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10128,10 +11095,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10150,14 +11123,31 @@ STAGE PLANS: TableScan alias: t3 Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean predicate: (key > 5) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10166,15 +11156,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10183,6 +11197,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -10190,22 +11212,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10231,10 +11293,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10253,14 +11321,31 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean, FilterStringGroupColLessEqualStringScalar(col 1, val val_20) -> boolean) -> boolean predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -10269,15 +11354,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10286,6 +11395,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -10293,22 +11410,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10331,10 +11488,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10353,14 +11516,31 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (key > 2) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10369,15 +11549,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10386,6 +11590,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 1 @@ -10393,18 +11605,54 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10432,10 +11680,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10454,7 +11708,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10463,6 +11724,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -10470,22 +11739,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10494,18 +11798,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10547,10 +11886,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10569,7 +11914,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10578,6 +11930,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10585,22 +11945,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongScalarMultiplyLongColumn(val 2, col 0) -> 2:long) -> boolean predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10609,18 +12004,55 @@ STAGE PLANS: key expressions: (2 * _col0) (type: int) sort order: + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0) -> 1:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10647,10 +12079,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10669,7 +12107,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10680,6 +12125,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 @@ -10688,43 +12138,107 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10733,18 +12247,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10783,10 +12332,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10805,7 +12360,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10814,6 +12376,14 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + bigTableKeyColumns: [0, 1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10821,22 +12391,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -10845,18 +12450,53 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -10893,10 +12533,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10915,7 +12561,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10926,6 +12579,11 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -10934,22 +12592,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10958,22 +12651,56 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10982,18 +12709,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11032,10 +12794,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11054,6 +12822,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -11062,6 +12833,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -11070,31 +12846,82 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11103,18 +12930,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11165,10 +13027,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11187,35 +13055,88 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11224,9 +13145,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -11246,13 +13184,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11306,10 +13262,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11328,23 +13290,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11353,21 +13348,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -11387,13 +13419,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11447,10 +13497,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11469,23 +13525,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11494,21 +13583,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -11528,13 +13654,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11590,10 +13734,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11612,23 +13762,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11637,21 +13820,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -11671,13 +13891,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11744,10 +13982,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11766,7 +14010,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11775,6 +14026,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -11785,6 +14044,14 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 4 @@ -11792,22 +14059,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11816,30 +14118,85 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11899,10 +14256,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11920,12 +14283,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -11933,12 +14307,23 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -11946,19 +14331,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -11967,9 +14382,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -11987,10 +14419,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12009,7 +14447,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12018,6 +14463,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12025,22 +14478,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12049,18 +14537,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12089,10 +14612,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12111,7 +14640,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12120,6 +14656,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12127,22 +14671,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12151,18 +14730,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12193,10 +14807,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12215,7 +14835,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12224,6 +14851,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12231,22 +14866,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12255,18 +14925,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12289,10 +14994,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12311,7 +15022,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12320,6 +15038,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -12327,26 +15053,65 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 15) -> boolean predicate: (key < 15) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 0 + native: false + projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -12355,18 +15120,53 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12400,10 +15200,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12422,7 +15228,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12431,6 +15244,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12438,22 +15259,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1, val val_10) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -12462,18 +15318,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12499,10 +15390,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12521,14 +15418,31 @@ STAGE PLANS: TableScan alias: t3 Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean predicate: (key > 5) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12537,15 +15451,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12554,6 +15492,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -12561,22 +15507,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12602,10 +15588,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12624,14 +15616,31 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 5) -> boolean, FilterStringGroupColLessEqualStringScalar(col 1, val val_20) -> boolean) -> boolean predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -12640,15 +15649,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12657,6 +15690,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [1] + bigTableValueColumns: [1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -12664,22 +15705,62 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12702,10 +15783,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12724,14 +15811,31 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (key > 2) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12740,15 +15844,39 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12757,6 +15885,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 1 @@ -12764,18 +15900,54 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12803,10 +15975,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12825,7 +16003,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12834,6 +16019,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -12841,22 +16034,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12865,18 +16093,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -12918,10 +16181,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12940,7 +16209,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12949,6 +16225,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12956,22 +16240,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2)(children: LongScalarMultiplyLongColumn(val 2, col 0) -> 2:long) -> boolean predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12980,18 +16299,55 @@ STAGE PLANS: key expressions: (2 * _col0) (type: int) sort order: + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0) -> 1:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13018,10 +16374,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13040,7 +16402,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -13051,6 +16420,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 @@ -13059,43 +16433,107 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13104,18 +16542,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13154,10 +16627,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13176,7 +16655,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -13185,6 +16671,14 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + bigTableKeyColumns: [0, 1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -13192,22 +16686,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -13216,18 +16745,53 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13264,10 +16828,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13286,7 +16856,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -13297,6 +16874,11 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -13305,22 +16887,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13329,22 +16946,56 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13353,18 +17004,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13403,10 +17089,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13425,6 +17117,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -13433,6 +17128,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0 input vertices: 1 Map 3 @@ -13441,31 +17141,82 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13474,18 +17225,53 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13536,10 +17322,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13558,35 +17350,88 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13595,9 +17440,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -13617,13 +17479,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13677,10 +17557,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13699,23 +17585,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13724,21 +17643,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -13758,13 +17714,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13818,10 +17792,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13840,23 +17820,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13865,21 +17878,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -13899,13 +17949,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -13961,10 +18029,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13983,23 +18057,56 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -14008,21 +18115,58 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -14042,13 +18186,31 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -14115,10 +18277,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -14137,7 +18305,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -14146,6 +18321,14 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -14156,6 +18339,14 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 4 @@ -14163,22 +18354,57 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -14187,30 +18413,85 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -14270,10 +18551,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -14291,12 +18578,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -14304,12 +18602,23 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -14317,19 +18626,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -14338,9 +18677,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index a075662..1fde0a9 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -65,6 +69,10 @@ STAGE PLANS: value expressions: _col2 (type: int) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -89,6 +97,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -109,10 +121,27 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -123,6 +152,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col2, _col4 input vertices: 0 Map 1 @@ -130,9 +163,16 @@ STAGE PLANS: Select Operator expressions: _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -170,19 +210,23 @@ POSTHOOK: Input: default@lineitem 64128 9141 82704 7721 PREHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY POSTHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -226,6 +270,10 @@ STAGE PLANS: value expressions: _col2 (type: int) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -250,6 +298,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -270,10 +322,27 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -284,6 +353,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col2, _col4 input vertices: 0 Map 1 @@ -291,9 +364,16 @@ STAGE PLANS: Select Operator expressions: _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out index b6a3b9a..d7ebd2b 100644 --- ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out @@ -211,7 +211,7 @@ POSTHOOK: Output: default@store PREHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, @@ -226,7 +226,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, @@ -238,6 +238,10 @@ explain select order by s_state limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -273,6 +277,12 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -293,6 +303,14 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -312,6 +330,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -359,6 +385,13 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -374,6 +407,13 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out index 469c702..d537297 100644 --- ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out +++ ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out @@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@orc1 POSTHOOK: Output: default@orc1 -PREHOOK: query: explain from orc1 a +PREHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 PREHOOK: type: QUERY -POSTHOOK: query: explain from orc1 a +POSTHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -142,6 +146,14 @@ STAGE PLANS: name: default.orc_rn3 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-4 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_null_projection.q.out ql/src/test/results/clientpositive/llap/vector_null_projection.q.out index 4bfe41a..45520e2 100644 --- ql/src/test/results/clientpositive/llap/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_null_projection.q.out @@ -37,13 +37,17 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@b POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization expression select NULL from a PREHOOK: type: QUERY POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization expression select NULL from a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,6 +75,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -87,12 +97,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@a #### A masked pattern was here #### NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select NULL as x from a union distinct select NULL as x from b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select NULL as x from a union distinct select NULL as x from b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,6 +142,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported + vectorized: false Map 4 Map Operator Tree: TableScan @@ -149,8 +169,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported + vectorized: false Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Column[KEY._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: void) diff --git ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index 06e30d8..cf90430 100644 --- ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -49,11 +49,15 @@ POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,12 +75,20 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 input vertices: @@ -85,9 +97,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -95,19 +114,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -134,10 +176,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -155,7 +201,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -166,6 +219,11 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: 1 Map 2 @@ -174,9 +232,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -184,38 +249,84 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -233,10 +344,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -254,6 +369,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -262,6 +380,11 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -271,9 +394,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -281,32 +411,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -351,10 +519,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -372,7 +544,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -383,6 +562,11 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -392,9 +576,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -402,36 +593,82 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -449,10 +686,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -470,6 +711,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -478,6 +722,11 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -487,9 +736,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -497,30 +753,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -644,11 +938,15 @@ NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL PREHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -666,12 +964,20 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 input vertices: @@ -680,9 +986,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -690,19 +1003,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -729,10 +1065,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -750,7 +1090,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -761,6 +1108,11 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: 1 Map 2 @@ -769,9 +1121,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -779,38 +1138,84 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -828,10 +1233,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -849,6 +1258,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -857,6 +1269,11 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -866,9 +1283,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -876,32 +1300,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -946,10 +1408,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -967,7 +1433,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -978,6 +1451,11 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -987,9 +1465,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -997,36 +1482,82 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1044,10 +1575,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1065,6 +1600,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -1073,6 +1611,11 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -1082,9 +1625,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1092,30 +1642,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index 6479ec5..c4bbeff 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -103,7 +103,7 @@ scratch.t scratch.si scratch.i scratch.b scratch.f scratch.d scratch.dc PREHOOK: query: -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q @@ -111,12 +111,16 @@ PREHOOK: type: QUERY POSTHOOK: query: -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -134,19 +138,45 @@ STAGE PLANS: TableScan alias: vectortab2k_orc Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), (t < 0) (type: boolean), (si <= 0) (type: boolean), (i = 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 7, 8, 9] + selectExpressions: LongColLessLongScalar(col 0, val 0) -> 7:long, LongColLessEqualLongScalar(col 1, val 0) -> 8:long, LongColEqualLongScalar(col 2, val 0) -> 9:long Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFHash(Column[KEY.reducesinkkey0], Column[KEY.reducesinkkey1], Column[KEY.reducesinkkey2], Column[VALUE._col0], Column[VALUE._col1], Column[VALUE._col2]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) @@ -185,17 +215,21 @@ POSTHOOK: Input: default@vectortab2k_orc #### A masked pattern was here #### c0 -3601806268 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -213,19 +247,45 @@ STAGE PLANS: TableScan alias: vectortab2k_orc Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), (t > 0) (type: boolean), (si >= 0) (type: boolean), (i <> 0) (type: boolean), (b > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 7, 8, 9, 10] + selectExpressions: LongColGreaterLongScalar(col 0, val 0) -> 7:long, LongColGreaterEqualLongScalar(col 1, val 0) -> 8:long, LongColNotEqualLongScalar(col 2, val 0) -> 9:long, LongColGreaterLongScalar(col 3, val 0) -> 10:long Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFHash(Column[KEY.reducesinkkey0], Column[KEY.reducesinkkey1], Column[KEY.reducesinkkey2], Column[KEY.reducesinkkey3], Column[VALUE._col0], Column[VALUE._col1], Column[VALUE._col2], Column[VALUE._col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_nvl.q.out ql/src/test/results/clientpositive/llap/vector_nvl.q.out index b926ab4b..aa8ed4a 100644 --- ql/src/test/results/clientpositive/llap/vector_nvl.q.out +++ ql/src/test/results/clientpositive/llap/vector_nvl.q.out @@ -1,31 +1,82 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean + predicate: cdouble is null (type: boolean) + Statistics: Num rows: 3114 Data size: 18608 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: double), 100.0 (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: ConstantVectorExpression(val null) -> 12:double, ConstantVectorExpression(val 100.0) -> 13:double + Statistics: Num rows: 3114 Data size: 24920 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: cdouble is null (type: boolean) - Select Operator - expressions: null (type: double), 100.0 (type: double) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc @@ -51,30 +102,76 @@ NULL 100.0 NULL 100.0 NULL 100.0 NULL 100.0 -PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: cfloat (type: float), NVL(cfloat,1) (type: float) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 13] + selectExpressions: VectorCoalesce(columns [4, 12])(children: col 4, ConstantVectorExpression(val 1.0) -> 12:double) -> 13:float + Statistics: Num rows: 12288 Data size: 85848 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: cfloat (type: float), NVL(cfloat,1) (type: float) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc @@ -98,30 +195,76 @@ NULL 1.0 27.0 27.0 -11.0 -11.0 61.0 61.0 -PREHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 10 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: ConstantVectorExpression(val 10) -> 12:long + Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: 10 (type: int) - outputColumnNames: _col0 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT nvl(null, 10) as n FROM alltypesorc @@ -145,30 +288,60 @@ POSTHOOK: Input: default@alltypesorc 10 10 10 -PREHOOK: query: EXPLAIN SELECT nvl(null, null) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, null) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: null (type: void) - outputColumnNames: _col0 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT nvl(null, null) as n FROM alltypesorc diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index 503cf5b..2073b22 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,12 +129,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 7 + native: false + projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -139,15 +157,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -155,17 +199,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 6edcbeb..9301a4e 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -85,9 +89,16 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -95,12 +106,25 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableOuterKeyMapping: 1 -> 2 + bigTableRetainedColumns: [0, 1, 2] + bigTableValueColumns: [0, 1] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2, 3] + smallTableMapping: [3] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -108,23 +132,61 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, String Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -152,12 +214,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -175,26 +241,57 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -202,12 +299,25 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableOuterKeyMapping: 0 -> 3 + bigTableRetainedColumns: [0, 1, 3] + bigTableValueColumns: [0, 1] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [2, 3, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -215,6 +325,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String, bigint Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index cda039f..f3af684 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -216,18 +216,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -245,9 +249,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -255,12 +266,25 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col2 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableOuterKeyMapping: 2 -> 14 + bigTableRetainedColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14] + bigTableValueColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + smallTableMapping: [12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 Statistics: Num rows: 32 Data size: 19648 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 32 Data size: 19648 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -268,23 +292,61 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, Double, Double, String, String, Timestamp, Timestamp, bigint, bigint Map 2 Map Operator Tree: TableScan alias: cd Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -329,18 +391,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -358,9 +424,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -368,12 +441,23 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 112 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 112 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -381,22 +465,59 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 2 Map Operator Tree: TableScan alias: hd Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -527,7 +648,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -536,7 +657,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -545,6 +666,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -563,9 +688,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 15 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -573,6 +705,14 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -583,63 +723,163 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 4 Statistics: Num rows: 240 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: cd Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: hd Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index 051911b..a1b14ce 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,6 +244,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -262,9 +266,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 20 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int), cbigint (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 20 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -272,6 +283,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableRetainedColumns: [3] + bigTableValueColumns: [3] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [3] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -282,63 +301,163 @@ STAGE PLANS: keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: [3] + bigTableRetainedColumns: [3] + bigTableValueColumns: [3] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [3] outputColumnNames: _col1 input vertices: 1 Map 4 Statistics: Num rows: 162 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: cd Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: hd Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out index 5729237..fbd294e 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,112 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 40 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 80 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -375,7 +270,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -384,7 +279,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -393,112 +288,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 2939 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2939 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 21 Data size: 1869 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 42 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -524,7 +314,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -533,7 +323,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -542,112 +332,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out index 75d783f..b9b97f6 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out @@ -246,82 +246,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - input vertices: - 1 Map 2 - Statistics: Num rows: 81 Data size: 49734 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 81 Data size: 49734 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select * @@ -394,81 +331,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 450 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 450 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select c.ctinyint @@ -897,7 +772,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -906,7 +781,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -915,112 +790,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 81 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 1215 Data size: 9720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.ctinyint diff --git ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out index 8a18738..eb61044 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out @@ -66,100 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -179,103 +100,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -295,103 +134,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6058 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -411,103 +168,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6248 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -527,7 +202,7 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -536,7 +211,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -545,119 +220,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col0 pmod 8) (type: bigint) - sort order: + - Map-reduce partition columns: (_col0 pmod 8) (type: bigint) - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 9760469 Data size: 78083752 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -743,100 +306,21 @@ POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -856,103 +340,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 39112 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -972,103 +374,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 11171 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1088,103 +408,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 14371 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1204,7 +442,7 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 17792 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1213,7 +451,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1222,119 +460,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col0 pmod 8) (type: bigint) - sort order: + - Map-reduce partition columns: (_col0 pmod 8) (type: bigint) - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 152914016 Data size: 1223312128 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out index 9369661..9a95606 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out @@ -126,113 +126,15 @@ POSTHOOK: Output: default@TJOIN4 POSTHOOK: Lineage: tjoin4.c1 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin4.c2 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin4.rnum SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -251,108 +153,15 @@ POSTHOOK: Input: default@tjoin3 0 3 0 1 NULL NULL 2 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index ebe895f..38c55d6 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -76,12 +76,16 @@ POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_date_sk SIMPLE [(invent POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -99,31 +103,73 @@ STAGE PLANS: TableScan alias: inventory_part_0 Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -204,12 +250,16 @@ POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_date_sk SIMPLE [(in POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -227,31 +277,73 @@ STAGE PLANS: TableScan alias: inventory_part_1 Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -334,12 +426,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2a POSTHOOK: Input: default@inventory_part_2a@par=2 POSTHOOK: Output: default@inventory_part_2a@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -357,31 +453,73 @@ STAGE PLANS: TableScan alias: inventory_part_2a Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -445,12 +583,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2b POSTHOOK: Input: default@inventory_part_2b@par1=2/par2=3 POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -468,31 +610,73 @@ STAGE PLANS: TableScan alias: inventory_part_2b Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -562,12 +746,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_3 POSTHOOK: Input: default@inventory_part_3@par=2 POSTHOOK: Output: default@inventory_part_3@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -585,31 +773,73 @@ STAGE PLANS: TableScan alias: inventory_part_3 Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 9eeb0d6..26fa9d9 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -256,12 +256,16 @@ POSTHOOK: Input: default@flights_tiny_orc 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -280,46 +284,102 @@ STAGE PLANS: TableScan alias: flights_tiny_orc Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int), _col2 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 4, 5, 0] Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int), _col2 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 4, 5, 0] Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -365,12 +425,16 @@ Chicago New York 2010-10-24 2010-10-24 07:00:00 113.0 897 Chicago New York 2010-10-25 2010-10-25 07:00:00 -1.0 897 Chicago New York 2010-10-26 2010-10-26 07:00:00 0.0 897 Chicago New York 2010-10-27 2010-10-27 07:00:00 -11.0 897 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -388,12 +452,26 @@ STAGE PLANS: TableScan alias: flights_tiny_orc Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -402,21 +480,50 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 68 Data size: 19584 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 68 Data size: 19584 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -590,17 +697,19 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-29 POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-30 POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 #### A masked pattern was here #### -Baltimore New York 2010-10-20 07:00:00 -30.0 1064 2010-10-20 -Baltimore New York 2010-10-20 07:00:00 23.0 1142 2010-10-20 -Baltimore New York 2010-10-20 07:00:00 6.0 1599 2010-10-20 -Chicago New York 2010-10-20 07:00:00 42.0 361 2010-10-20 -Chicago New York 2010-10-20 07:00:00 24.0 897 2010-10-20 -Chicago New York 2010-10-20 07:00:00 15.0 1531 2010-10-20 -Chicago New York 2010-10-20 07:00:00 -6.0 1610 2010-10-20 -Chicago New York 2010-10-20 07:00:00 -2.0 3198 2010-10-20 -Cleveland New York 2010-10-20 07:00:00 -8.0 2630 2010-10-20 -Cleveland New York 2010-10-20 07:00:00 -15.0 3014 2010-10-20 -Washington New York 2010-10-20 07:00:00 -2.0 7291 2010-10-20 +Baltimore New York 2010-10-26 07:00:00 -22.0 1064 2010-10-26 +Baltimore New York 2010-10-26 07:00:00 123.0 1142 2010-10-26 +Baltimore New York 2010-10-26 07:00:00 90.0 1599 2010-10-26 +Chicago New York 2010-10-26 07:00:00 12.0 361 2010-10-26 +Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 +Chicago New York 2010-10-26 07:00:00 29.0 1531 2010-10-26 +Chicago New York 2010-10-26 07:00:00 -17.0 1610 2010-10-26 +Chicago New York 2010-10-26 07:00:00 6.0 3198 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 4.0 2630 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 -27.0 2646 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 -11.0 2662 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 13.0 3014 2010-10-26 +Washington New York 2010-10-26 07:00:00 4.0 7291 2010-10-26 Baltimore New York 2010-10-21 07:00:00 17.0 1064 2010-10-21 Baltimore New York 2010-10-21 07:00:00 105.0 1142 2010-10-21 Baltimore New York 2010-10-21 07:00:00 28.0 1599 2010-10-21 @@ -613,41 +722,6 @@ Cleveland New York 2010-10-21 07:00:00 3.0 2630 2010-10-21 Cleveland New York 2010-10-21 07:00:00 29.0 2646 2010-10-21 Cleveland New York 2010-10-21 07:00:00 72.0 3014 2010-10-21 Washington New York 2010-10-21 07:00:00 22.0 7291 2010-10-21 -Baltimore New York 2010-10-22 07:00:00 -12.0 1064 2010-10-22 -Baltimore New York 2010-10-22 07:00:00 54.0 1142 2010-10-22 -Baltimore New York 2010-10-22 07:00:00 18.0 1599 2010-10-22 -Chicago New York 2010-10-22 07:00:00 2.0 361 2010-10-22 -Chicago New York 2010-10-22 07:00:00 24.0 897 2010-10-22 -Chicago New York 2010-10-22 07:00:00 16.0 1531 2010-10-22 -Chicago New York 2010-10-22 07:00:00 -6.0 1610 2010-10-22 -Chicago New York 2010-10-22 07:00:00 -11.0 3198 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 1.0 2630 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 -25.0 2646 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 -3.0 3014 2010-10-22 -Baltimore New York 2010-10-23 07:00:00 18.0 272 2010-10-23 -Baltimore New York 2010-10-23 07:00:00 -10.0 1805 2010-10-23 -Baltimore New York 2010-10-23 07:00:00 6.0 3171 2010-10-23 -Chicago New York 2010-10-23 07:00:00 3.0 384 2010-10-23 -Chicago New York 2010-10-23 07:00:00 32.0 426 2010-10-23 -Chicago New York 2010-10-23 07:00:00 1.0 650 2010-10-23 -Chicago New York 2010-10-23 07:00:00 11.0 3085 2010-10-23 -Cleveland New York 2010-10-23 07:00:00 -21.0 2932 2010-10-23 -Washington New York 2010-10-23 07:00:00 -25.0 5832 2010-10-23 -Washington New York 2010-10-23 07:00:00 -21.0 5904 2010-10-23 -Washington New York 2010-10-23 07:00:00 -18.0 5917 2010-10-23 -Washington New York 2010-10-23 07:00:00 -16.0 7274 2010-10-23 -Baltimore New York 2010-10-24 07:00:00 12.0 1599 2010-10-24 -Baltimore New York 2010-10-24 07:00:00 20.0 2571 2010-10-24 -Chicago New York 2010-10-24 07:00:00 10.0 361 2010-10-24 -Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -5.0 1531 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -17.0 1610 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -3.0 3198 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 5.0 2254 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -11.0 2630 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -20.0 2646 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -9.0 3014 2010-10-24 -Washington New York 2010-10-24 07:00:00 -26.0 7282 2010-10-24 Baltimore New York 2010-10-25 07:00:00 -25.0 1064 2010-10-25 Baltimore New York 2010-10-25 07:00:00 92.0 1142 2010-10-25 Baltimore New York 2010-10-25 07:00:00 106.0 1599 2010-10-25 @@ -660,19 +734,42 @@ Cleveland New York 2010-10-25 07:00:00 -4.0 2630 2010-10-25 Cleveland New York 2010-10-25 07:00:00 81.0 2646 2010-10-25 Cleveland New York 2010-10-25 07:00:00 42.0 3014 2010-10-25 Washington New York 2010-10-25 07:00:00 9.0 7291 2010-10-25 -Baltimore New York 2010-10-26 07:00:00 -22.0 1064 2010-10-26 -Baltimore New York 2010-10-26 07:00:00 123.0 1142 2010-10-26 -Baltimore New York 2010-10-26 07:00:00 90.0 1599 2010-10-26 -Chicago New York 2010-10-26 07:00:00 12.0 361 2010-10-26 -Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 -Chicago New York 2010-10-26 07:00:00 29.0 1531 2010-10-26 -Chicago New York 2010-10-26 07:00:00 -17.0 1610 2010-10-26 -Chicago New York 2010-10-26 07:00:00 6.0 3198 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 4.0 2630 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 -27.0 2646 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 -11.0 2662 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 13.0 3014 2010-10-26 -Washington New York 2010-10-26 07:00:00 4.0 7291 2010-10-26 +Baltimore New York 2010-10-24 07:00:00 12.0 1599 2010-10-24 +Baltimore New York 2010-10-24 07:00:00 20.0 2571 2010-10-24 +Chicago New York 2010-10-24 07:00:00 10.0 361 2010-10-24 +Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -5.0 1531 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -17.0 1610 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -3.0 3198 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 5.0 2254 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -11.0 2630 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -20.0 2646 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -9.0 3014 2010-10-24 +Washington New York 2010-10-24 07:00:00 -26.0 7282 2010-10-24 +Baltimore New York 2010-10-23 07:00:00 18.0 272 2010-10-23 +Baltimore New York 2010-10-23 07:00:00 -10.0 1805 2010-10-23 +Baltimore New York 2010-10-23 07:00:00 6.0 3171 2010-10-23 +Chicago New York 2010-10-23 07:00:00 3.0 384 2010-10-23 +Chicago New York 2010-10-23 07:00:00 32.0 426 2010-10-23 +Chicago New York 2010-10-23 07:00:00 1.0 650 2010-10-23 +Chicago New York 2010-10-23 07:00:00 11.0 3085 2010-10-23 +Cleveland New York 2010-10-23 07:00:00 -21.0 2932 2010-10-23 +Washington New York 2010-10-23 07:00:00 -25.0 5832 2010-10-23 +Washington New York 2010-10-23 07:00:00 -21.0 5904 2010-10-23 +Washington New York 2010-10-23 07:00:00 -18.0 5917 2010-10-23 +Washington New York 2010-10-23 07:00:00 -16.0 7274 2010-10-23 +Baltimore New York 2010-10-29 07:00:00 -24.0 1064 2010-10-29 +Baltimore New York 2010-10-29 07:00:00 21.0 1142 2010-10-29 +Baltimore New York 2010-10-29 07:00:00 -2.0 1599 2010-10-29 +Chicago New York 2010-10-29 07:00:00 -12.0 361 2010-10-29 +Chicago New York 2010-10-29 07:00:00 -11.0 897 2010-10-29 +Chicago New York 2010-10-29 07:00:00 15.0 1531 2010-10-29 +Chicago New York 2010-10-29 07:00:00 -18.0 1610 2010-10-29 +Chicago New York 2010-10-29 07:00:00 -4.0 3198 2010-10-29 +Cleveland New York 2010-10-29 07:00:00 -4.0 2630 2010-10-29 +Cleveland New York 2010-10-29 07:00:00 -19.0 2646 2010-10-29 +Cleveland New York 2010-10-29 07:00:00 -12.0 3014 2010-10-29 +Washington New York 2010-10-29 07:00:00 1.0 7291 2010-10-29 Baltimore New York 2010-10-27 07:00:00 -18.0 1064 2010-10-27 Baltimore New York 2010-10-27 07:00:00 49.0 1142 2010-10-27 Baltimore New York 2010-10-27 07:00:00 92.0 1599 2010-10-27 @@ -696,18 +793,6 @@ Cleveland New York 2010-10-28 07:00:00 3.0 2630 2010-10-28 Cleveland New York 2010-10-28 07:00:00 -6.0 2646 2010-10-28 Cleveland New York 2010-10-28 07:00:00 1.0 3014 2010-10-28 Washington New York 2010-10-28 07:00:00 45.0 7291 2010-10-28 -Baltimore New York 2010-10-29 07:00:00 -24.0 1064 2010-10-29 -Baltimore New York 2010-10-29 07:00:00 21.0 1142 2010-10-29 -Baltimore New York 2010-10-29 07:00:00 -2.0 1599 2010-10-29 -Chicago New York 2010-10-29 07:00:00 -12.0 361 2010-10-29 -Chicago New York 2010-10-29 07:00:00 -11.0 897 2010-10-29 -Chicago New York 2010-10-29 07:00:00 15.0 1531 2010-10-29 -Chicago New York 2010-10-29 07:00:00 -18.0 1610 2010-10-29 -Chicago New York 2010-10-29 07:00:00 -4.0 3198 2010-10-29 -Cleveland New York 2010-10-29 07:00:00 -4.0 2630 2010-10-29 -Cleveland New York 2010-10-29 07:00:00 -19.0 2646 2010-10-29 -Cleveland New York 2010-10-29 07:00:00 -12.0 3014 2010-10-29 -Washington New York 2010-10-29 07:00:00 1.0 7291 2010-10-29 Baltimore New York 2010-10-30 07:00:00 14.0 272 2010-10-30 Baltimore New York 2010-10-30 07:00:00 -1.0 1805 2010-10-30 Baltimore New York 2010-10-30 07:00:00 5.0 3171 2010-10-30 @@ -719,6 +804,28 @@ Cleveland New York 2010-10-30 07:00:00 -23.0 2018 2010-10-30 Cleveland New York 2010-10-30 07:00:00 -12.0 2932 2010-10-30 Washington New York 2010-10-30 07:00:00 -27.0 5904 2010-10-30 Washington New York 2010-10-30 07:00:00 -16.0 5917 2010-10-30 +Baltimore New York 2010-10-20 07:00:00 -30.0 1064 2010-10-20 +Baltimore New York 2010-10-20 07:00:00 23.0 1142 2010-10-20 +Baltimore New York 2010-10-20 07:00:00 6.0 1599 2010-10-20 +Chicago New York 2010-10-20 07:00:00 42.0 361 2010-10-20 +Chicago New York 2010-10-20 07:00:00 24.0 897 2010-10-20 +Chicago New York 2010-10-20 07:00:00 15.0 1531 2010-10-20 +Chicago New York 2010-10-20 07:00:00 -6.0 1610 2010-10-20 +Chicago New York 2010-10-20 07:00:00 -2.0 3198 2010-10-20 +Cleveland New York 2010-10-20 07:00:00 -8.0 2630 2010-10-20 +Cleveland New York 2010-10-20 07:00:00 -15.0 3014 2010-10-20 +Washington New York 2010-10-20 07:00:00 -2.0 7291 2010-10-20 +Baltimore New York 2010-10-22 07:00:00 -12.0 1064 2010-10-22 +Baltimore New York 2010-10-22 07:00:00 54.0 1142 2010-10-22 +Baltimore New York 2010-10-22 07:00:00 18.0 1599 2010-10-22 +Chicago New York 2010-10-22 07:00:00 2.0 361 2010-10-22 +Chicago New York 2010-10-22 07:00:00 24.0 897 2010-10-22 +Chicago New York 2010-10-22 07:00:00 16.0 1531 2010-10-22 +Chicago New York 2010-10-22 07:00:00 -6.0 1610 2010-10-22 +Chicago New York 2010-10-22 07:00:00 -11.0 3198 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 1.0 2630 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 -25.0 2646 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 -3.0 3014 2010-10-22 Baltimore New York 2010-10-31 07:00:00 -1.0 1599 2010-10-31 Baltimore New York 2010-10-31 07:00:00 -14.0 2571 2010-10-31 Chicago New York 2010-10-31 07:00:00 -25.0 361 2010-10-31 @@ -828,26 +935,67 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: flights_tiny_orc_partitioned_date + Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: flights_tiny_orc_partitioned_date - Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - ListSink + ListSink PREHOOK: query: select * from flights_tiny_orc_partitioned_date PREHOOK: type: QUERY @@ -881,17 +1029,19 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-29 POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-30 POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 #### A masked pattern was here #### -Baltimore New York 2010-10-20 07:00:00 -30.0 1064 2010-10-20 -Baltimore New York 2010-10-20 07:00:00 23.0 1142 2010-10-20 -Baltimore New York 2010-10-20 07:00:00 6.0 1599 2010-10-20 -Chicago New York 2010-10-20 07:00:00 42.0 361 2010-10-20 -Chicago New York 2010-10-20 07:00:00 24.0 897 2010-10-20 -Chicago New York 2010-10-20 07:00:00 15.0 1531 2010-10-20 -Chicago New York 2010-10-20 07:00:00 -6.0 1610 2010-10-20 -Chicago New York 2010-10-20 07:00:00 -2.0 3198 2010-10-20 -Cleveland New York 2010-10-20 07:00:00 -8.0 2630 2010-10-20 -Cleveland New York 2010-10-20 07:00:00 -15.0 3014 2010-10-20 -Washington New York 2010-10-20 07:00:00 -2.0 7291 2010-10-20 +Baltimore New York 2010-10-26 07:00:00 -22.0 1064 2010-10-26 +Baltimore New York 2010-10-26 07:00:00 123.0 1142 2010-10-26 +Baltimore New York 2010-10-26 07:00:00 90.0 1599 2010-10-26 +Chicago New York 2010-10-26 07:00:00 12.0 361 2010-10-26 +Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 +Chicago New York 2010-10-26 07:00:00 29.0 1531 2010-10-26 +Chicago New York 2010-10-26 07:00:00 -17.0 1610 2010-10-26 +Chicago New York 2010-10-26 07:00:00 6.0 3198 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 4.0 2630 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 -27.0 2646 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 -11.0 2662 2010-10-26 +Cleveland New York 2010-10-26 07:00:00 13.0 3014 2010-10-26 +Washington New York 2010-10-26 07:00:00 4.0 7291 2010-10-26 Baltimore New York 2010-10-21 07:00:00 17.0 1064 2010-10-21 Baltimore New York 2010-10-21 07:00:00 105.0 1142 2010-10-21 Baltimore New York 2010-10-21 07:00:00 28.0 1599 2010-10-21 @@ -904,41 +1054,6 @@ Cleveland New York 2010-10-21 07:00:00 3.0 2630 2010-10-21 Cleveland New York 2010-10-21 07:00:00 29.0 2646 2010-10-21 Cleveland New York 2010-10-21 07:00:00 72.0 3014 2010-10-21 Washington New York 2010-10-21 07:00:00 22.0 7291 2010-10-21 -Baltimore New York 2010-10-22 07:00:00 -12.0 1064 2010-10-22 -Baltimore New York 2010-10-22 07:00:00 54.0 1142 2010-10-22 -Baltimore New York 2010-10-22 07:00:00 18.0 1599 2010-10-22 -Chicago New York 2010-10-22 07:00:00 2.0 361 2010-10-22 -Chicago New York 2010-10-22 07:00:00 24.0 897 2010-10-22 -Chicago New York 2010-10-22 07:00:00 16.0 1531 2010-10-22 -Chicago New York 2010-10-22 07:00:00 -6.0 1610 2010-10-22 -Chicago New York 2010-10-22 07:00:00 -11.0 3198 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 1.0 2630 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 -25.0 2646 2010-10-22 -Cleveland New York 2010-10-22 07:00:00 -3.0 3014 2010-10-22 -Baltimore New York 2010-10-23 07:00:00 18.0 272 2010-10-23 -Baltimore New York 2010-10-23 07:00:00 -10.0 1805 2010-10-23 -Baltimore New York 2010-10-23 07:00:00 6.0 3171 2010-10-23 -Chicago New York 2010-10-23 07:00:00 3.0 384 2010-10-23 -Chicago New York 2010-10-23 07:00:00 32.0 426 2010-10-23 -Chicago New York 2010-10-23 07:00:00 1.0 650 2010-10-23 -Chicago New York 2010-10-23 07:00:00 11.0 3085 2010-10-23 -Cleveland New York 2010-10-23 07:00:00 -21.0 2932 2010-10-23 -Washington New York 2010-10-23 07:00:00 -25.0 5832 2010-10-23 -Washington New York 2010-10-23 07:00:00 -21.0 5904 2010-10-23 -Washington New York 2010-10-23 07:00:00 -18.0 5917 2010-10-23 -Washington New York 2010-10-23 07:00:00 -16.0 7274 2010-10-23 -Baltimore New York 2010-10-24 07:00:00 12.0 1599 2010-10-24 -Baltimore New York 2010-10-24 07:00:00 20.0 2571 2010-10-24 -Chicago New York 2010-10-24 07:00:00 10.0 361 2010-10-24 -Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -5.0 1531 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -17.0 1610 2010-10-24 -Chicago New York 2010-10-24 07:00:00 -3.0 3198 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 5.0 2254 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -11.0 2630 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -20.0 2646 2010-10-24 -Cleveland New York 2010-10-24 07:00:00 -9.0 3014 2010-10-24 -Washington New York 2010-10-24 07:00:00 -26.0 7282 2010-10-24 Baltimore New York 2010-10-25 07:00:00 -25.0 1064 2010-10-25 Baltimore New York 2010-10-25 07:00:00 92.0 1142 2010-10-25 Baltimore New York 2010-10-25 07:00:00 106.0 1599 2010-10-25 @@ -951,42 +1066,30 @@ Cleveland New York 2010-10-25 07:00:00 -4.0 2630 2010-10-25 Cleveland New York 2010-10-25 07:00:00 81.0 2646 2010-10-25 Cleveland New York 2010-10-25 07:00:00 42.0 3014 2010-10-25 Washington New York 2010-10-25 07:00:00 9.0 7291 2010-10-25 -Baltimore New York 2010-10-26 07:00:00 -22.0 1064 2010-10-26 -Baltimore New York 2010-10-26 07:00:00 123.0 1142 2010-10-26 -Baltimore New York 2010-10-26 07:00:00 90.0 1599 2010-10-26 -Chicago New York 2010-10-26 07:00:00 12.0 361 2010-10-26 -Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 -Chicago New York 2010-10-26 07:00:00 29.0 1531 2010-10-26 -Chicago New York 2010-10-26 07:00:00 -17.0 1610 2010-10-26 -Chicago New York 2010-10-26 07:00:00 6.0 3198 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 4.0 2630 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 -27.0 2646 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 -11.0 2662 2010-10-26 -Cleveland New York 2010-10-26 07:00:00 13.0 3014 2010-10-26 -Washington New York 2010-10-26 07:00:00 4.0 7291 2010-10-26 -Baltimore New York 2010-10-27 07:00:00 -18.0 1064 2010-10-27 -Baltimore New York 2010-10-27 07:00:00 49.0 1142 2010-10-27 -Baltimore New York 2010-10-27 07:00:00 92.0 1599 2010-10-27 -Chicago New York 2010-10-27 07:00:00 148.0 361 2010-10-27 -Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 -Chicago New York 2010-10-27 07:00:00 70.0 1531 2010-10-27 -Chicago New York 2010-10-27 07:00:00 8.0 1610 2010-10-27 -Chicago New York 2010-10-27 07:00:00 21.0 3198 2010-10-27 -Cleveland New York 2010-10-27 07:00:00 16.0 2630 2010-10-27 -Cleveland New York 2010-10-27 07:00:00 27.0 3014 2010-10-27 -Washington New York 2010-10-27 07:00:00 26.0 7291 2010-10-27 -Baltimore New York 2010-10-28 07:00:00 -4.0 1064 2010-10-28 -Baltimore New York 2010-10-28 07:00:00 -14.0 1142 2010-10-28 -Baltimore New York 2010-10-28 07:00:00 -14.0 1599 2010-10-28 -Chicago New York 2010-10-28 07:00:00 2.0 361 2010-10-28 -Chicago New York 2010-10-28 07:00:00 2.0 897 2010-10-28 -Chicago New York 2010-10-28 07:00:00 -11.0 1531 2010-10-28 -Chicago New York 2010-10-28 07:00:00 3.0 1610 2010-10-28 -Chicago New York 2010-10-28 07:00:00 -18.0 3198 2010-10-28 -Cleveland New York 2010-10-28 07:00:00 3.0 2630 2010-10-28 -Cleveland New York 2010-10-28 07:00:00 -6.0 2646 2010-10-28 -Cleveland New York 2010-10-28 07:00:00 1.0 3014 2010-10-28 -Washington New York 2010-10-28 07:00:00 45.0 7291 2010-10-28 +Baltimore New York 2010-10-24 07:00:00 12.0 1599 2010-10-24 +Baltimore New York 2010-10-24 07:00:00 20.0 2571 2010-10-24 +Chicago New York 2010-10-24 07:00:00 10.0 361 2010-10-24 +Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -5.0 1531 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -17.0 1610 2010-10-24 +Chicago New York 2010-10-24 07:00:00 -3.0 3198 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 5.0 2254 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -11.0 2630 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -20.0 2646 2010-10-24 +Cleveland New York 2010-10-24 07:00:00 -9.0 3014 2010-10-24 +Washington New York 2010-10-24 07:00:00 -26.0 7282 2010-10-24 +Baltimore New York 2010-10-23 07:00:00 18.0 272 2010-10-23 +Baltimore New York 2010-10-23 07:00:00 -10.0 1805 2010-10-23 +Baltimore New York 2010-10-23 07:00:00 6.0 3171 2010-10-23 +Chicago New York 2010-10-23 07:00:00 3.0 384 2010-10-23 +Chicago New York 2010-10-23 07:00:00 32.0 426 2010-10-23 +Chicago New York 2010-10-23 07:00:00 1.0 650 2010-10-23 +Chicago New York 2010-10-23 07:00:00 11.0 3085 2010-10-23 +Cleveland New York 2010-10-23 07:00:00 -21.0 2932 2010-10-23 +Washington New York 2010-10-23 07:00:00 -25.0 5832 2010-10-23 +Washington New York 2010-10-23 07:00:00 -21.0 5904 2010-10-23 +Washington New York 2010-10-23 07:00:00 -18.0 5917 2010-10-23 +Washington New York 2010-10-23 07:00:00 -16.0 7274 2010-10-23 Baltimore New York 2010-10-29 07:00:00 -24.0 1064 2010-10-29 Baltimore New York 2010-10-29 07:00:00 21.0 1142 2010-10-29 Baltimore New York 2010-10-29 07:00:00 -2.0 1599 2010-10-29 @@ -999,6 +1102,29 @@ Cleveland New York 2010-10-29 07:00:00 -4.0 2630 2010-10-29 Cleveland New York 2010-10-29 07:00:00 -19.0 2646 2010-10-29 Cleveland New York 2010-10-29 07:00:00 -12.0 3014 2010-10-29 Washington New York 2010-10-29 07:00:00 1.0 7291 2010-10-29 +Baltimore New York 2010-10-27 07:00:00 -18.0 1064 2010-10-27 +Baltimore New York 2010-10-27 07:00:00 49.0 1142 2010-10-27 +Baltimore New York 2010-10-27 07:00:00 92.0 1599 2010-10-27 +Chicago New York 2010-10-27 07:00:00 148.0 361 2010-10-27 +Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 +Chicago New York 2010-10-27 07:00:00 70.0 1531 2010-10-27 +Chicago New York 2010-10-27 07:00:00 8.0 1610 2010-10-27 +Chicago New York 2010-10-27 07:00:00 21.0 3198 2010-10-27 +Cleveland New York 2010-10-27 07:00:00 16.0 2630 2010-10-27 +Cleveland New York 2010-10-27 07:00:00 27.0 3014 2010-10-27 +Washington New York 2010-10-27 07:00:00 26.0 7291 2010-10-27 +Baltimore New York 2010-10-28 07:00:00 -4.0 1064 2010-10-28 +Baltimore New York 2010-10-28 07:00:00 -14.0 1142 2010-10-28 +Baltimore New York 2010-10-28 07:00:00 -14.0 1599 2010-10-28 +Chicago New York 2010-10-28 07:00:00 2.0 361 2010-10-28 +Chicago New York 2010-10-28 07:00:00 2.0 897 2010-10-28 +Chicago New York 2010-10-28 07:00:00 -11.0 1531 2010-10-28 +Chicago New York 2010-10-28 07:00:00 3.0 1610 2010-10-28 +Chicago New York 2010-10-28 07:00:00 -18.0 3198 2010-10-28 +Cleveland New York 2010-10-28 07:00:00 3.0 2630 2010-10-28 +Cleveland New York 2010-10-28 07:00:00 -6.0 2646 2010-10-28 +Cleveland New York 2010-10-28 07:00:00 1.0 3014 2010-10-28 +Washington New York 2010-10-28 07:00:00 45.0 7291 2010-10-28 Baltimore New York 2010-10-30 07:00:00 14.0 272 2010-10-30 Baltimore New York 2010-10-30 07:00:00 -1.0 1805 2010-10-30 Baltimore New York 2010-10-30 07:00:00 5.0 3171 2010-10-30 @@ -1010,6 +1136,28 @@ Cleveland New York 2010-10-30 07:00:00 -23.0 2018 2010-10-30 Cleveland New York 2010-10-30 07:00:00 -12.0 2932 2010-10-30 Washington New York 2010-10-30 07:00:00 -27.0 5904 2010-10-30 Washington New York 2010-10-30 07:00:00 -16.0 5917 2010-10-30 +Baltimore New York 2010-10-20 07:00:00 -30.0 1064 2010-10-20 +Baltimore New York 2010-10-20 07:00:00 23.0 1142 2010-10-20 +Baltimore New York 2010-10-20 07:00:00 6.0 1599 2010-10-20 +Chicago New York 2010-10-20 07:00:00 42.0 361 2010-10-20 +Chicago New York 2010-10-20 07:00:00 24.0 897 2010-10-20 +Chicago New York 2010-10-20 07:00:00 15.0 1531 2010-10-20 +Chicago New York 2010-10-20 07:00:00 -6.0 1610 2010-10-20 +Chicago New York 2010-10-20 07:00:00 -2.0 3198 2010-10-20 +Cleveland New York 2010-10-20 07:00:00 -8.0 2630 2010-10-20 +Cleveland New York 2010-10-20 07:00:00 -15.0 3014 2010-10-20 +Washington New York 2010-10-20 07:00:00 -2.0 7291 2010-10-20 +Baltimore New York 2010-10-22 07:00:00 -12.0 1064 2010-10-22 +Baltimore New York 2010-10-22 07:00:00 54.0 1142 2010-10-22 +Baltimore New York 2010-10-22 07:00:00 18.0 1599 2010-10-22 +Chicago New York 2010-10-22 07:00:00 2.0 361 2010-10-22 +Chicago New York 2010-10-22 07:00:00 24.0 897 2010-10-22 +Chicago New York 2010-10-22 07:00:00 16.0 1531 2010-10-22 +Chicago New York 2010-10-22 07:00:00 -6.0 1610 2010-10-22 +Chicago New York 2010-10-22 07:00:00 -11.0 3198 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 1.0 2630 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 -25.0 2646 2010-10-22 +Cleveland New York 2010-10-22 07:00:00 -3.0 3014 2010-10-22 Baltimore New York 2010-10-31 07:00:00 -1.0 1599 2010-10-31 Baltimore New York 2010-10-31 07:00:00 -14.0 2571 2010-10-31 Chicago New York 2010-10-31 07:00:00 -25.0 361 2010-10-31 @@ -1018,12 +1166,16 @@ Chicago New York 2010-10-31 07:00:00 -4.0 1531 2010-10-31 Chicago New York 2010-10-31 07:00:00 -22.0 1610 2010-10-31 Chicago New York 2010-10-31 07:00:00 -15.0 3198 2010-10-31 Washington New York 2010-10-31 07:00:00 -18.0 7282 2010-10-31 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1042,46 +1194,102 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_date Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 0, 1] Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 0, 1] Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1151,12 +1359,16 @@ Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 Chicago New York 2010-10-25 07:00:00 -1.0 897 2010-10-25 Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1174,12 +1386,26 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_date Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5] Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -1188,21 +1414,50 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1400,17 +1655,19 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10-30 07%3A00%3A00 POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10-31 07%3A00%3A00 #### A masked pattern was here #### -Baltimore New York 2010-10-20 -30.0 1064 2010-10-20 07:00:00 -Baltimore New York 2010-10-20 23.0 1142 2010-10-20 07:00:00 -Baltimore New York 2010-10-20 6.0 1599 2010-10-20 07:00:00 -Chicago New York 2010-10-20 42.0 361 2010-10-20 07:00:00 -Chicago New York 2010-10-20 24.0 897 2010-10-20 07:00:00 -Chicago New York 2010-10-20 15.0 1531 2010-10-20 07:00:00 -Chicago New York 2010-10-20 -6.0 1610 2010-10-20 07:00:00 -Chicago New York 2010-10-20 -2.0 3198 2010-10-20 07:00:00 -Cleveland New York 2010-10-20 -8.0 2630 2010-10-20 07:00:00 -Cleveland New York 2010-10-20 -15.0 3014 2010-10-20 07:00:00 -Washington New York 2010-10-20 -2.0 7291 2010-10-20 07:00:00 +Baltimore New York 2010-10-26 -22.0 1064 2010-10-26 07:00:00 +Baltimore New York 2010-10-26 123.0 1142 2010-10-26 07:00:00 +Baltimore New York 2010-10-26 90.0 1599 2010-10-26 07:00:00 +Chicago New York 2010-10-26 12.0 361 2010-10-26 07:00:00 +Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 +Chicago New York 2010-10-26 29.0 1531 2010-10-26 07:00:00 +Chicago New York 2010-10-26 -17.0 1610 2010-10-26 07:00:00 +Chicago New York 2010-10-26 6.0 3198 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 4.0 2630 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 -27.0 2646 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 -11.0 2662 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 13.0 3014 2010-10-26 07:00:00 +Washington New York 2010-10-26 4.0 7291 2010-10-26 07:00:00 Baltimore New York 2010-10-21 17.0 1064 2010-10-21 07:00:00 Baltimore New York 2010-10-21 105.0 1142 2010-10-21 07:00:00 Baltimore New York 2010-10-21 28.0 1599 2010-10-21 07:00:00 @@ -1423,41 +1680,6 @@ Cleveland New York 2010-10-21 3.0 2630 2010-10-21 07:00:00 Cleveland New York 2010-10-21 29.0 2646 2010-10-21 07:00:00 Cleveland New York 2010-10-21 72.0 3014 2010-10-21 07:00:00 Washington New York 2010-10-21 22.0 7291 2010-10-21 07:00:00 -Baltimore New York 2010-10-22 -12.0 1064 2010-10-22 07:00:00 -Baltimore New York 2010-10-22 54.0 1142 2010-10-22 07:00:00 -Baltimore New York 2010-10-22 18.0 1599 2010-10-22 07:00:00 -Chicago New York 2010-10-22 2.0 361 2010-10-22 07:00:00 -Chicago New York 2010-10-22 24.0 897 2010-10-22 07:00:00 -Chicago New York 2010-10-22 16.0 1531 2010-10-22 07:00:00 -Chicago New York 2010-10-22 -6.0 1610 2010-10-22 07:00:00 -Chicago New York 2010-10-22 -11.0 3198 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 1.0 2630 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 -25.0 2646 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 -3.0 3014 2010-10-22 07:00:00 -Baltimore New York 2010-10-23 18.0 272 2010-10-23 07:00:00 -Baltimore New York 2010-10-23 -10.0 1805 2010-10-23 07:00:00 -Baltimore New York 2010-10-23 6.0 3171 2010-10-23 07:00:00 -Chicago New York 2010-10-23 3.0 384 2010-10-23 07:00:00 -Chicago New York 2010-10-23 32.0 426 2010-10-23 07:00:00 -Chicago New York 2010-10-23 1.0 650 2010-10-23 07:00:00 -Chicago New York 2010-10-23 11.0 3085 2010-10-23 07:00:00 -Cleveland New York 2010-10-23 -21.0 2932 2010-10-23 07:00:00 -Washington New York 2010-10-23 -25.0 5832 2010-10-23 07:00:00 -Washington New York 2010-10-23 -21.0 5904 2010-10-23 07:00:00 -Washington New York 2010-10-23 -18.0 5917 2010-10-23 07:00:00 -Washington New York 2010-10-23 -16.0 7274 2010-10-23 07:00:00 -Baltimore New York 2010-10-24 12.0 1599 2010-10-24 07:00:00 -Baltimore New York 2010-10-24 20.0 2571 2010-10-24 07:00:00 -Chicago New York 2010-10-24 10.0 361 2010-10-24 07:00:00 -Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -5.0 1531 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -17.0 1610 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -3.0 3198 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 5.0 2254 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -11.0 2630 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -20.0 2646 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -9.0 3014 2010-10-24 07:00:00 -Washington New York 2010-10-24 -26.0 7282 2010-10-24 07:00:00 Baltimore New York 2010-10-25 -25.0 1064 2010-10-25 07:00:00 Baltimore New York 2010-10-25 92.0 1142 2010-10-25 07:00:00 Baltimore New York 2010-10-25 106.0 1599 2010-10-25 07:00:00 @@ -1470,19 +1692,42 @@ Cleveland New York 2010-10-25 -4.0 2630 2010-10-25 07:00:00 Cleveland New York 2010-10-25 81.0 2646 2010-10-25 07:00:00 Cleveland New York 2010-10-25 42.0 3014 2010-10-25 07:00:00 Washington New York 2010-10-25 9.0 7291 2010-10-25 07:00:00 -Baltimore New York 2010-10-26 -22.0 1064 2010-10-26 07:00:00 -Baltimore New York 2010-10-26 123.0 1142 2010-10-26 07:00:00 -Baltimore New York 2010-10-26 90.0 1599 2010-10-26 07:00:00 -Chicago New York 2010-10-26 12.0 361 2010-10-26 07:00:00 -Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 -Chicago New York 2010-10-26 29.0 1531 2010-10-26 07:00:00 -Chicago New York 2010-10-26 -17.0 1610 2010-10-26 07:00:00 -Chicago New York 2010-10-26 6.0 3198 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 4.0 2630 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 -27.0 2646 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 -11.0 2662 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 13.0 3014 2010-10-26 07:00:00 -Washington New York 2010-10-26 4.0 7291 2010-10-26 07:00:00 +Baltimore New York 2010-10-24 12.0 1599 2010-10-24 07:00:00 +Baltimore New York 2010-10-24 20.0 2571 2010-10-24 07:00:00 +Chicago New York 2010-10-24 10.0 361 2010-10-24 07:00:00 +Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -5.0 1531 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -17.0 1610 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -3.0 3198 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 5.0 2254 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -11.0 2630 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -20.0 2646 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -9.0 3014 2010-10-24 07:00:00 +Washington New York 2010-10-24 -26.0 7282 2010-10-24 07:00:00 +Baltimore New York 2010-10-23 18.0 272 2010-10-23 07:00:00 +Baltimore New York 2010-10-23 -10.0 1805 2010-10-23 07:00:00 +Baltimore New York 2010-10-23 6.0 3171 2010-10-23 07:00:00 +Chicago New York 2010-10-23 3.0 384 2010-10-23 07:00:00 +Chicago New York 2010-10-23 32.0 426 2010-10-23 07:00:00 +Chicago New York 2010-10-23 1.0 650 2010-10-23 07:00:00 +Chicago New York 2010-10-23 11.0 3085 2010-10-23 07:00:00 +Cleveland New York 2010-10-23 -21.0 2932 2010-10-23 07:00:00 +Washington New York 2010-10-23 -25.0 5832 2010-10-23 07:00:00 +Washington New York 2010-10-23 -21.0 5904 2010-10-23 07:00:00 +Washington New York 2010-10-23 -18.0 5917 2010-10-23 07:00:00 +Washington New York 2010-10-23 -16.0 7274 2010-10-23 07:00:00 +Baltimore New York 2010-10-29 -24.0 1064 2010-10-29 07:00:00 +Baltimore New York 2010-10-29 21.0 1142 2010-10-29 07:00:00 +Baltimore New York 2010-10-29 -2.0 1599 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -12.0 361 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -11.0 897 2010-10-29 07:00:00 +Chicago New York 2010-10-29 15.0 1531 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -18.0 1610 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -4.0 3198 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -4.0 2630 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -19.0 2646 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -12.0 3014 2010-10-29 07:00:00 +Washington New York 2010-10-29 1.0 7291 2010-10-29 07:00:00 Baltimore New York 2010-10-27 -18.0 1064 2010-10-27 07:00:00 Baltimore New York 2010-10-27 49.0 1142 2010-10-27 07:00:00 Baltimore New York 2010-10-27 92.0 1599 2010-10-27 07:00:00 @@ -1506,18 +1751,6 @@ Cleveland New York 2010-10-28 3.0 2630 2010-10-28 07:00:00 Cleveland New York 2010-10-28 -6.0 2646 2010-10-28 07:00:00 Cleveland New York 2010-10-28 1.0 3014 2010-10-28 07:00:00 Washington New York 2010-10-28 45.0 7291 2010-10-28 07:00:00 -Baltimore New York 2010-10-29 -24.0 1064 2010-10-29 07:00:00 -Baltimore New York 2010-10-29 21.0 1142 2010-10-29 07:00:00 -Baltimore New York 2010-10-29 -2.0 1599 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -12.0 361 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -11.0 897 2010-10-29 07:00:00 -Chicago New York 2010-10-29 15.0 1531 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -18.0 1610 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -4.0 3198 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -4.0 2630 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -19.0 2646 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -12.0 3014 2010-10-29 07:00:00 -Washington New York 2010-10-29 1.0 7291 2010-10-29 07:00:00 Baltimore New York 2010-10-30 14.0 272 2010-10-30 07:00:00 Baltimore New York 2010-10-30 -1.0 1805 2010-10-30 07:00:00 Baltimore New York 2010-10-30 5.0 3171 2010-10-30 07:00:00 @@ -1529,6 +1762,28 @@ Cleveland New York 2010-10-30 -23.0 2018 2010-10-30 07:00:00 Cleveland New York 2010-10-30 -12.0 2932 2010-10-30 07:00:00 Washington New York 2010-10-30 -27.0 5904 2010-10-30 07:00:00 Washington New York 2010-10-30 -16.0 5917 2010-10-30 07:00:00 +Baltimore New York 2010-10-20 -30.0 1064 2010-10-20 07:00:00 +Baltimore New York 2010-10-20 23.0 1142 2010-10-20 07:00:00 +Baltimore New York 2010-10-20 6.0 1599 2010-10-20 07:00:00 +Chicago New York 2010-10-20 42.0 361 2010-10-20 07:00:00 +Chicago New York 2010-10-20 24.0 897 2010-10-20 07:00:00 +Chicago New York 2010-10-20 15.0 1531 2010-10-20 07:00:00 +Chicago New York 2010-10-20 -6.0 1610 2010-10-20 07:00:00 +Chicago New York 2010-10-20 -2.0 3198 2010-10-20 07:00:00 +Cleveland New York 2010-10-20 -8.0 2630 2010-10-20 07:00:00 +Cleveland New York 2010-10-20 -15.0 3014 2010-10-20 07:00:00 +Washington New York 2010-10-20 -2.0 7291 2010-10-20 07:00:00 +Baltimore New York 2010-10-22 -12.0 1064 2010-10-22 07:00:00 +Baltimore New York 2010-10-22 54.0 1142 2010-10-22 07:00:00 +Baltimore New York 2010-10-22 18.0 1599 2010-10-22 07:00:00 +Chicago New York 2010-10-22 2.0 361 2010-10-22 07:00:00 +Chicago New York 2010-10-22 24.0 897 2010-10-22 07:00:00 +Chicago New York 2010-10-22 16.0 1531 2010-10-22 07:00:00 +Chicago New York 2010-10-22 -6.0 1610 2010-10-22 07:00:00 +Chicago New York 2010-10-22 -11.0 3198 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 1.0 2630 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 -25.0 2646 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 -3.0 3014 2010-10-22 07:00:00 Baltimore New York 2010-10-31 -1.0 1599 2010-10-31 07:00:00 Baltimore New York 2010-10-31 -14.0 2571 2010-10-31 07:00:00 Chicago New York 2010-10-31 -25.0 361 2010-10-31 07:00:00 @@ -1638,26 +1893,67 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- 2010-10-29 07:00:00 12 2010-10-30 07:00:00 11 2010-10-31 07:00:00 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: flights_tiny_orc_partitioned_timestamp + Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 137 Data size: 5480 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 5480 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: flights_tiny_orc_partitioned_timestamp - Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - ListSink + ListSink PREHOOK: query: select * from flights_tiny_orc_partitioned_timestamp PREHOOK: type: QUERY @@ -1691,17 +1987,19 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10-30 07%3A00%3A00 POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10-31 07%3A00%3A00 #### A masked pattern was here #### -Baltimore New York 2010-10-20 -30.0 1064 2010-10-20 07:00:00 -Baltimore New York 2010-10-20 23.0 1142 2010-10-20 07:00:00 -Baltimore New York 2010-10-20 6.0 1599 2010-10-20 07:00:00 -Chicago New York 2010-10-20 42.0 361 2010-10-20 07:00:00 -Chicago New York 2010-10-20 24.0 897 2010-10-20 07:00:00 -Chicago New York 2010-10-20 15.0 1531 2010-10-20 07:00:00 -Chicago New York 2010-10-20 -6.0 1610 2010-10-20 07:00:00 -Chicago New York 2010-10-20 -2.0 3198 2010-10-20 07:00:00 -Cleveland New York 2010-10-20 -8.0 2630 2010-10-20 07:00:00 -Cleveland New York 2010-10-20 -15.0 3014 2010-10-20 07:00:00 -Washington New York 2010-10-20 -2.0 7291 2010-10-20 07:00:00 +Baltimore New York 2010-10-26 -22.0 1064 2010-10-26 07:00:00 +Baltimore New York 2010-10-26 123.0 1142 2010-10-26 07:00:00 +Baltimore New York 2010-10-26 90.0 1599 2010-10-26 07:00:00 +Chicago New York 2010-10-26 12.0 361 2010-10-26 07:00:00 +Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 +Chicago New York 2010-10-26 29.0 1531 2010-10-26 07:00:00 +Chicago New York 2010-10-26 -17.0 1610 2010-10-26 07:00:00 +Chicago New York 2010-10-26 6.0 3198 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 4.0 2630 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 -27.0 2646 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 -11.0 2662 2010-10-26 07:00:00 +Cleveland New York 2010-10-26 13.0 3014 2010-10-26 07:00:00 +Washington New York 2010-10-26 4.0 7291 2010-10-26 07:00:00 Baltimore New York 2010-10-21 17.0 1064 2010-10-21 07:00:00 Baltimore New York 2010-10-21 105.0 1142 2010-10-21 07:00:00 Baltimore New York 2010-10-21 28.0 1599 2010-10-21 07:00:00 @@ -1714,41 +2012,6 @@ Cleveland New York 2010-10-21 3.0 2630 2010-10-21 07:00:00 Cleveland New York 2010-10-21 29.0 2646 2010-10-21 07:00:00 Cleveland New York 2010-10-21 72.0 3014 2010-10-21 07:00:00 Washington New York 2010-10-21 22.0 7291 2010-10-21 07:00:00 -Baltimore New York 2010-10-22 -12.0 1064 2010-10-22 07:00:00 -Baltimore New York 2010-10-22 54.0 1142 2010-10-22 07:00:00 -Baltimore New York 2010-10-22 18.0 1599 2010-10-22 07:00:00 -Chicago New York 2010-10-22 2.0 361 2010-10-22 07:00:00 -Chicago New York 2010-10-22 24.0 897 2010-10-22 07:00:00 -Chicago New York 2010-10-22 16.0 1531 2010-10-22 07:00:00 -Chicago New York 2010-10-22 -6.0 1610 2010-10-22 07:00:00 -Chicago New York 2010-10-22 -11.0 3198 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 1.0 2630 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 -25.0 2646 2010-10-22 07:00:00 -Cleveland New York 2010-10-22 -3.0 3014 2010-10-22 07:00:00 -Baltimore New York 2010-10-23 18.0 272 2010-10-23 07:00:00 -Baltimore New York 2010-10-23 -10.0 1805 2010-10-23 07:00:00 -Baltimore New York 2010-10-23 6.0 3171 2010-10-23 07:00:00 -Chicago New York 2010-10-23 3.0 384 2010-10-23 07:00:00 -Chicago New York 2010-10-23 32.0 426 2010-10-23 07:00:00 -Chicago New York 2010-10-23 1.0 650 2010-10-23 07:00:00 -Chicago New York 2010-10-23 11.0 3085 2010-10-23 07:00:00 -Cleveland New York 2010-10-23 -21.0 2932 2010-10-23 07:00:00 -Washington New York 2010-10-23 -25.0 5832 2010-10-23 07:00:00 -Washington New York 2010-10-23 -21.0 5904 2010-10-23 07:00:00 -Washington New York 2010-10-23 -18.0 5917 2010-10-23 07:00:00 -Washington New York 2010-10-23 -16.0 7274 2010-10-23 07:00:00 -Baltimore New York 2010-10-24 12.0 1599 2010-10-24 07:00:00 -Baltimore New York 2010-10-24 20.0 2571 2010-10-24 07:00:00 -Chicago New York 2010-10-24 10.0 361 2010-10-24 07:00:00 -Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -5.0 1531 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -17.0 1610 2010-10-24 07:00:00 -Chicago New York 2010-10-24 -3.0 3198 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 5.0 2254 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -11.0 2630 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -20.0 2646 2010-10-24 07:00:00 -Cleveland New York 2010-10-24 -9.0 3014 2010-10-24 07:00:00 -Washington New York 2010-10-24 -26.0 7282 2010-10-24 07:00:00 Baltimore New York 2010-10-25 -25.0 1064 2010-10-25 07:00:00 Baltimore New York 2010-10-25 92.0 1142 2010-10-25 07:00:00 Baltimore New York 2010-10-25 106.0 1599 2010-10-25 07:00:00 @@ -1761,19 +2024,42 @@ Cleveland New York 2010-10-25 -4.0 2630 2010-10-25 07:00:00 Cleveland New York 2010-10-25 81.0 2646 2010-10-25 07:00:00 Cleveland New York 2010-10-25 42.0 3014 2010-10-25 07:00:00 Washington New York 2010-10-25 9.0 7291 2010-10-25 07:00:00 -Baltimore New York 2010-10-26 -22.0 1064 2010-10-26 07:00:00 -Baltimore New York 2010-10-26 123.0 1142 2010-10-26 07:00:00 -Baltimore New York 2010-10-26 90.0 1599 2010-10-26 07:00:00 -Chicago New York 2010-10-26 12.0 361 2010-10-26 07:00:00 -Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 -Chicago New York 2010-10-26 29.0 1531 2010-10-26 07:00:00 -Chicago New York 2010-10-26 -17.0 1610 2010-10-26 07:00:00 -Chicago New York 2010-10-26 6.0 3198 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 4.0 2630 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 -27.0 2646 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 -11.0 2662 2010-10-26 07:00:00 -Cleveland New York 2010-10-26 13.0 3014 2010-10-26 07:00:00 -Washington New York 2010-10-26 4.0 7291 2010-10-26 07:00:00 +Baltimore New York 2010-10-24 12.0 1599 2010-10-24 07:00:00 +Baltimore New York 2010-10-24 20.0 2571 2010-10-24 07:00:00 +Chicago New York 2010-10-24 10.0 361 2010-10-24 07:00:00 +Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -5.0 1531 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -17.0 1610 2010-10-24 07:00:00 +Chicago New York 2010-10-24 -3.0 3198 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 5.0 2254 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -11.0 2630 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -20.0 2646 2010-10-24 07:00:00 +Cleveland New York 2010-10-24 -9.0 3014 2010-10-24 07:00:00 +Washington New York 2010-10-24 -26.0 7282 2010-10-24 07:00:00 +Baltimore New York 2010-10-23 18.0 272 2010-10-23 07:00:00 +Baltimore New York 2010-10-23 -10.0 1805 2010-10-23 07:00:00 +Baltimore New York 2010-10-23 6.0 3171 2010-10-23 07:00:00 +Chicago New York 2010-10-23 3.0 384 2010-10-23 07:00:00 +Chicago New York 2010-10-23 32.0 426 2010-10-23 07:00:00 +Chicago New York 2010-10-23 1.0 650 2010-10-23 07:00:00 +Chicago New York 2010-10-23 11.0 3085 2010-10-23 07:00:00 +Cleveland New York 2010-10-23 -21.0 2932 2010-10-23 07:00:00 +Washington New York 2010-10-23 -25.0 5832 2010-10-23 07:00:00 +Washington New York 2010-10-23 -21.0 5904 2010-10-23 07:00:00 +Washington New York 2010-10-23 -18.0 5917 2010-10-23 07:00:00 +Washington New York 2010-10-23 -16.0 7274 2010-10-23 07:00:00 +Baltimore New York 2010-10-29 -24.0 1064 2010-10-29 07:00:00 +Baltimore New York 2010-10-29 21.0 1142 2010-10-29 07:00:00 +Baltimore New York 2010-10-29 -2.0 1599 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -12.0 361 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -11.0 897 2010-10-29 07:00:00 +Chicago New York 2010-10-29 15.0 1531 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -18.0 1610 2010-10-29 07:00:00 +Chicago New York 2010-10-29 -4.0 3198 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -4.0 2630 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -19.0 2646 2010-10-29 07:00:00 +Cleveland New York 2010-10-29 -12.0 3014 2010-10-29 07:00:00 +Washington New York 2010-10-29 1.0 7291 2010-10-29 07:00:00 Baltimore New York 2010-10-27 -18.0 1064 2010-10-27 07:00:00 Baltimore New York 2010-10-27 49.0 1142 2010-10-27 07:00:00 Baltimore New York 2010-10-27 92.0 1599 2010-10-27 07:00:00 @@ -1797,18 +2083,6 @@ Cleveland New York 2010-10-28 3.0 2630 2010-10-28 07:00:00 Cleveland New York 2010-10-28 -6.0 2646 2010-10-28 07:00:00 Cleveland New York 2010-10-28 1.0 3014 2010-10-28 07:00:00 Washington New York 2010-10-28 45.0 7291 2010-10-28 07:00:00 -Baltimore New York 2010-10-29 -24.0 1064 2010-10-29 07:00:00 -Baltimore New York 2010-10-29 21.0 1142 2010-10-29 07:00:00 -Baltimore New York 2010-10-29 -2.0 1599 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -12.0 361 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -11.0 897 2010-10-29 07:00:00 -Chicago New York 2010-10-29 15.0 1531 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -18.0 1610 2010-10-29 07:00:00 -Chicago New York 2010-10-29 -4.0 3198 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -4.0 2630 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -19.0 2646 2010-10-29 07:00:00 -Cleveland New York 2010-10-29 -12.0 3014 2010-10-29 07:00:00 -Washington New York 2010-10-29 1.0 7291 2010-10-29 07:00:00 Baltimore New York 2010-10-30 14.0 272 2010-10-30 07:00:00 Baltimore New York 2010-10-30 -1.0 1805 2010-10-30 07:00:00 Baltimore New York 2010-10-30 5.0 3171 2010-10-30 07:00:00 @@ -1820,6 +2094,28 @@ Cleveland New York 2010-10-30 -23.0 2018 2010-10-30 07:00:00 Cleveland New York 2010-10-30 -12.0 2932 2010-10-30 07:00:00 Washington New York 2010-10-30 -27.0 5904 2010-10-30 07:00:00 Washington New York 2010-10-30 -16.0 5917 2010-10-30 07:00:00 +Baltimore New York 2010-10-20 -30.0 1064 2010-10-20 07:00:00 +Baltimore New York 2010-10-20 23.0 1142 2010-10-20 07:00:00 +Baltimore New York 2010-10-20 6.0 1599 2010-10-20 07:00:00 +Chicago New York 2010-10-20 42.0 361 2010-10-20 07:00:00 +Chicago New York 2010-10-20 24.0 897 2010-10-20 07:00:00 +Chicago New York 2010-10-20 15.0 1531 2010-10-20 07:00:00 +Chicago New York 2010-10-20 -6.0 1610 2010-10-20 07:00:00 +Chicago New York 2010-10-20 -2.0 3198 2010-10-20 07:00:00 +Cleveland New York 2010-10-20 -8.0 2630 2010-10-20 07:00:00 +Cleveland New York 2010-10-20 -15.0 3014 2010-10-20 07:00:00 +Washington New York 2010-10-20 -2.0 7291 2010-10-20 07:00:00 +Baltimore New York 2010-10-22 -12.0 1064 2010-10-22 07:00:00 +Baltimore New York 2010-10-22 54.0 1142 2010-10-22 07:00:00 +Baltimore New York 2010-10-22 18.0 1599 2010-10-22 07:00:00 +Chicago New York 2010-10-22 2.0 361 2010-10-22 07:00:00 +Chicago New York 2010-10-22 24.0 897 2010-10-22 07:00:00 +Chicago New York 2010-10-22 16.0 1531 2010-10-22 07:00:00 +Chicago New York 2010-10-22 -6.0 1610 2010-10-22 07:00:00 +Chicago New York 2010-10-22 -11.0 3198 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 1.0 2630 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 -25.0 2646 2010-10-22 07:00:00 +Cleveland New York 2010-10-22 -3.0 3014 2010-10-22 07:00:00 Baltimore New York 2010-10-31 -1.0 1599 2010-10-31 07:00:00 Baltimore New York 2010-10-31 -14.0 2571 2010-10-31 07:00:00 Chicago New York 2010-10-31 -25.0 361 2010-10-31 07:00:00 @@ -1828,12 +2124,16 @@ Chicago New York 2010-10-31 -4.0 1531 2010-10-31 07:00:00 Chicago New York 2010-10-31 -22.0 1610 2010-10-31 07:00:00 Chicago New York 2010-10-31 -15.0 3198 2010-10-31 07:00:00 Washington New York 2010-10-31 -18.0 7282 2010-10-31 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1852,46 +2152,102 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_timestamp Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 0, 1] Statistics: Num rows: 137 Data size: 5480 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 0, 1] Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1961,12 +2317,16 @@ Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 Chicago New York 2010-10-25 -1.0 897 2010-10-25 07:00:00 Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 Chicago New York 2010-10-27 -11.0 897 2010-10-27 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1984,12 +2344,26 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_timestamp Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_time (type: timestamp) outputColumnNames: fl_time + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5] Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: fl_time (type: timestamp) mode: hash outputColumnNames: _col0, _col1 @@ -1998,21 +2372,50 @@ STAGE PLANS: key expressions: _col0 (type: timestamp) sort order: + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce1.q.out ql/src/test/results/clientpositive/llap/vector_reduce1.q.out index 4c252c7..0bab7bd 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce1.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce1.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select b from vectortab2korc order by b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select b from vectortab2korc order by b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,25 +132,59 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: b (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce2.q.out ql/src/test/results/clientpositive/llap/vector_reduce2.q.out index a4ce890..30c4f8e 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce2.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,25 +132,59 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: s (type: string), i (type: int), s2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 2, 9] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce3.q.out ql/src/test/results/clientpositive/llap/vector_reduce3.q.out index d34113c..21b9844 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce3.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s from vectortab2korc order by s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s from vectortab2korc order by s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,25 +132,59 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: s (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 9571b5b..09efd32 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -14,20 +14,24 @@ POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,11 +50,25 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 2) -> 4:long) -> boolean, SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 3) -> 4:long) -> boolean) -> boolean predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cdecimal1) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 2) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -59,16 +77,43 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 4) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: double), KEY._col2 (type: decimal(20,10)), KEY._col3 (type: decimal(23,14)) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -76,21 +121,43 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(20,10)), KEY.reducesinkkey3 (type: decimal(23,14)), VALUE._col0 (type: decimal(20,10)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 11800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 11800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index ca3d2fa..4927f17 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -97,32 +97,78 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Select Operator + expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 12, 11] + selectExpressions: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 12:String_Family, StringGroupColConcatStringScalar(col 13, val |)(children: StringScalarConcatStringGroupCol(val |, col 11)(children: StringRTrim(col 13)(children: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 13:String_Family) -> 11:String) -> 13:String_Family) -> 11:String_Family + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 20 Processor Tree: - TableScan - alias: over1korc - Select Operator - expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 20 - ListSink + ListSink PREHOOK: query: SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, @@ -265,20 +311,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -297,11 +347,25 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [19] + selectExpressions: StringGroupConcatColCol(col 17, col 18)(children: StringGroupColConcatStringScalar(col 18, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 17)(children: CastLongToString(col 13)(children: CastDoubleToLong(col 15)(children: DoubleColAddDoubleScalar(col 16, val 1.0)(children: DoubleColDivideDoubleScalar(col 15, val 3.0)(children: CastLongToDouble(col 14)(children: LongColSubtractLongScalar(col 13, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 13:long) -> 14:long) -> 15:double) -> 16:double) -> 15:double) -> 13:long) -> 17:String) -> 18:String_Family) -> 17:String_Family, CastLongToString(col 13)(children: VectorUDFYearDate(col 12, field YEAR) -> 13:long) -> 18:String) -> 19:String_Family Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 19 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -310,14 +374,40 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -325,20 +415,42 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_struct_in.q.out ql/src/test/results/clientpositive/llap/vector_struct_in.q.out index 0aa1e70..dedc4e6 100644 --- ql/src/test/results/clientpositive/llap/vector_struct_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_struct_in.q.out @@ -22,7 +22,7 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -36,7 +36,7 @@ struct('nine','1'), struct('ten','1') ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -50,15 +50,68 @@ struct('nine','1'), struct('ten','1') ) POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_2] - Output:["_col0","_col1"] - Filter Operator [FIL_4] - predicate:(struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> boolean + predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select * from test_1 where struct(`id`, `lineid`) IN ( @@ -92,7 +145,7 @@ POSTHOOK: Input: default@test_1 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -106,7 +159,7 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -120,13 +173,62 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2"] - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: id (type: string), lineid (type: string), (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> 3:boolean + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) IN ( @@ -180,7 +282,7 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -194,7 +296,7 @@ struct(9,1), struct(10,1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -208,15 +310,68 @@ struct(9,1), struct(10,1) ) POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_2] - Output:["_col0","_col1"] - Filter Operator [FIL_4] - predicate:(struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> boolean + predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), lineid (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select * from test_2 where struct(`id`, `lineid`) IN ( @@ -250,7 +405,7 @@ POSTHOOK: Input: default@test_2 #### A masked pattern was here #### 1 1 7 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -264,7 +419,7 @@ struct(9,1), struct(10,1) ) as b from test_2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -278,13 +433,62 @@ struct(9,1), struct(10,1) ) as b from test_2 POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2"] - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: id (type: int), lineid (type: int), (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> 3:boolean + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) IN ( @@ -338,7 +542,7 @@ POSTHOOK: Input: default@values__tmp__table__3 POSTHOOK: Output: default@test_3 POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -352,7 +556,7 @@ struct('nine',1), struct('ten',1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -366,15 +570,68 @@ struct('nine',1), struct('ten',1) ) POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_2] - Output:["_col0","_col1"] - Filter Operator [FIL_4] - predicate:(struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_3 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> boolean + predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select * from test_3 where struct(`id`, `lineid`) IN ( @@ -408,7 +665,7 @@ POSTHOOK: Input: default@test_3 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -422,7 +679,7 @@ struct('nine',1), struct('ten',1) ) as b from test_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -436,13 +693,62 @@ struct('nine',1), struct('ten',1) ) as b from test_3 POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2"] - TableScan [TS_0] - Output:["id","lineid"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_3 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: id (type: string), lineid (type: int), (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> 3:boolean + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) IN ( @@ -497,7 +803,7 @@ POSTHOOK: Output: default@test_4 POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -512,7 +818,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -527,15 +833,68 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_2] - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_4] - predicate:(struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) - TableScan [TS_0] - Output:["my_bigint","my_string","my_double"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_4 + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> boolean + predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( @@ -570,7 +929,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_4 #### A masked pattern was here #### 1 a 0.5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -585,7 +944,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -600,13 +959,62 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 POSTHOOK: type: QUERY -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] - Output:["my_bigint","my_string","my_double"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_4 + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Select Operator + expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double), (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 4] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> 4:boolean + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 9450b8f..1b3e9a5 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -25,7 +25,7 @@ POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- UDFs with varchar support -explain +explain vectorization expression select concat(c1, c2), concat(c3, c4), @@ -33,13 +33,17 @@ select from varchar_udf_1 limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- UDFs with varchar support -explain +explain vectorization expression select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -54,15 +58,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: concat(c1, c2) (type: string), concat(c3, c4) (type: varchar(30)), (concat(c1, c2) = UDFToString(concat(c3, c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 9] + selectExpressions: StringGroupConcatColCol(col 0, col 1) -> 4:String_Family, StringGroupConcatColCol(col 2, col 3) -> 5:String_Family, StringGroupColEqualStringGroupColumn(col 6, col 8)(children: StringGroupConcatColCol(col 0, col 1) -> 6:String_Family, CastStringGroupToString(col 7)(children: StringGroupConcatColCol(col 2, col 3) -> 7:String_Family) -> 8:String) -> 9:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -70,6 +88,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -94,20 +120,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238val_238 238val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -122,15 +152,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: upper(c2) (type: string), upper(c4) (type: varchar(20)), (upper(c2) = UDFToString(upper(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 9] + selectExpressions: StringUpper(col 1) -> 4:String, StringUpper(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 8)(children: StringUpper(col 1) -> 6:String, CastStringGroupToString(col 7)(children: StringUpper(col 3) -> 7:String) -> 8:String) -> 9:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -138,6 +182,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -162,20 +214,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### VAL_238 VAL_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -190,15 +246,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: lower(c2) (type: string), lower(c4) (type: varchar(20)), (lower(c2) = UDFToString(lower(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 9] + selectExpressions: StringLower(col 1) -> 4:String, StringLower(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 8)(children: StringLower(col 1) -> 6:String, CastStringGroupToString(col 7)(children: StringLower(col 3) -> 7:String) -> 8:String) -> 9:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -206,6 +276,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -231,7 +309,7 @@ POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: -- Scalar UDFs -explain +explain vectorization expression select ascii(c2), ascii(c4), @@ -239,13 +317,17 @@ select from varchar_udf_1 limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- Scalar UDFs -explain +explain vectorization expression select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -276,6 +358,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFBridge ==> ascii (Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -300,20 +388,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 118 118 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -344,6 +436,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFConcatWS(Const string |, Column[c1], Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -368,20 +466,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238|val_238 238|val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -412,6 +514,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFDecode(GenericUDFEncode(Column[c2], Const string US-ASCII), Const string US-ASCII) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -436,20 +544,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -480,6 +592,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFInstr(Column[c2], Const string _) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -504,20 +622,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 4 4 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -532,15 +654,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: length(c2) (type: int), length(c4) (type: int), (length(c2) = length(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringLength(col 1) -> 4:Long, StringLength(col 3) -> 5:Long, LongColEqualLongColumn(col 6, col 7)(children: StringLength(col 1) -> 6:Long, StringLength(col 3) -> 7:Long) -> 8:long Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -548,6 +684,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -572,20 +716,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 7 7 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -600,15 +748,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: 5 (type: int), 5 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6] + selectExpressions: ConstantVectorExpression(val 5) -> 4:long, ConstantVectorExpression(val 5) -> 5:long, ConstantVectorExpression(val 1) -> 6:long Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -616,6 +778,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -640,20 +810,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 5 5 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -684,6 +858,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFLpad(Column[c2], Const int 15, Const string ) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -708,20 +888,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -736,15 +920,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ltrim(c2) (type: string), ltrim(c4) (type: string), (ltrim(c2) = ltrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringLTrim(col 1) -> 4:String, StringLTrim(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: StringLTrim(col 1) -> 6:String, StringLTrim(col 3) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -752,6 +950,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -776,20 +982,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -804,15 +1014,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(c2 regexp 'val') -> 4:Long, VectorUDFAdaptor(c4 regexp 'val') -> 5:Long, LongColEqualLongColumn(col 6, col 7)(children: VectorUDFAdaptor(c2 regexp 'val') -> 6:Long, VectorUDFAdaptor(c4 regexp 'val') -> 7:Long) -> 8:long Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -820,6 +1044,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -844,20 +1076,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -872,15 +1108,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 4:String, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 6:String, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -888,6 +1138,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -912,20 +1170,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -940,15 +1202,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 4:String, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 6:String, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -956,6 +1232,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -980,20 +1264,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1024,6 +1312,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFBridge ==> reverse (Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1048,20 +1342,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 832_lav 832_lav true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1092,6 +1390,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFRpad(Column[c2], Const int 15, Const string ) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1116,20 +1420,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1144,15 +1452,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: rtrim(c2) (type: string), rtrim(c4) (type: string), (rtrim(c2) = rtrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringRTrim(col 1) -> 4:String, StringRTrim(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: StringRTrim(col 1) -> 6:String, StringRTrim(col 3) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1160,6 +1482,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1184,18 +1514,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1226,6 +1560,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type array> of GenericUDFSentences(Const string See spot run. See jane run.) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1248,18 +1588,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### [["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select split(c2, '_'), split(c4, '_') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select split(c2, '_'), split(c4, '_') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1290,6 +1634,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type array of GenericUDFSplit(Column[c2], Const string _) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1312,18 +1662,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### ["val","238"] ["val","238"] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1354,6 +1708,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of GenericUDFStringToMap(Const string a:1,b:2,c:3, Const string ,, Const string :) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1375,21 +1735,25 @@ from varchar_udf_1 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} -PREHOOK: query: explain +{"b":"2","a":"1","c":"3"} {"b":"2","a":"1","c":"3"} +PREHOOK: query: explain vectorization expression select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1404,15 +1768,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: substr(c2, 1, 3) (type: string), substr(c4, 1, 3) (type: string), (substr(c2, 1, 3) = substr(c4, 1, 3)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringSubstrColStartLen(col 1, start 0, length 3) -> 4:string, StringSubstrColStartLen(col 3, start 0, length 3) -> 5:string, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: StringSubstrColStartLen(col 1, start 0, length 3) -> 6:string, StringSubstrColStartLen(col 3, start 0, length 3) -> 7:string) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1420,6 +1798,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1444,20 +1830,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val val true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1472,15 +1862,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: trim(c2) (type: string), trim(c4) (type: string), (trim(c2) = trim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringTrim(col 1) -> 4:String, StringTrim(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: StringTrim(col 1) -> 6:String, StringTrim(col 3) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1488,6 +1892,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1513,19 +1925,23 @@ POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: -- Aggregate Functions -explain +explain vectorization expression select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: -- Aggregate Functions -explain +explain vectorization expression select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1558,8 +1974,19 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -1595,18 +2022,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(c2), min(c4) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(c2), min(c4) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1624,31 +2055,73 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3] Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(c2), min(c4) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 1) -> string, VectorUDAFMinString(col 3) -> string + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), min(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMinString(col 1) -> string + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1676,18 +2149,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select max(c2), max(c4) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select max(c2), max(c4) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1705,31 +2182,73 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3] Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(c2), max(c4) + Group By Vectorization: + aggregators: VectorUDAFMaxString(col 1) -> string, VectorUDAFMaxString(col 3) -> string + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFMaxString(col 0) -> string, VectorUDAFMaxString(col 1) -> string + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out index 7d14256..5979f8b 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -143,12 +147,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: name: default.varchar_lazy_binary_columnar Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out index 9c2c536..09259c8 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out @@ -125,11 +125,15 @@ POSTHOOK: Output: default@varchar_join1_str_orc POSTHOOK: Lineage: varchar_join1_str_orc.c1 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: varchar_join1_str_orc.c2 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c2, type:string, comment:null), ] PREHOOK: query: -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -172,6 +176,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -192,8 +204,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(10)) @@ -225,11 +252,15 @@ POSTHOOK: Input: default@varchar_join1_vc1_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -272,6 +303,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -292,8 +331,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(20)) @@ -327,11 +381,15 @@ POSTHOOK: Input: default@varchar_join1_vc2_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -374,6 +432,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -394,8 +460,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out index edb67f1..911a962 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -84,8 +88,23 @@ STAGE PLANS: value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -148,16 +167,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -187,8 +210,23 @@ STAGE PLANS: value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -254,12 +292,16 @@ create table varchar_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -279,36 +321,81 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + selectExpressions: CastLongToVarChar(col 0, maxLength 25) -> 1:VarChar Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 5a3cfe4..fbec4e0 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -20,12 +20,16 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -43,12 +47,27 @@ STAGE PLANS: TableScan alias: count_case_groupby Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:Long Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -57,21 +76,50 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index faceb5c..34e7dca 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -1,7 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -12,7 +12,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -38,43 +42,101 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 1) -> tinyint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -106,16 +168,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -134,42 +200,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -195,7 +319,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39856 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -208,7 +332,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -221,6 +345,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -254,8 +382,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -269,6 +410,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -317,7 +465,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -325,7 +473,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -333,6 +481,10 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -351,43 +503,101 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 3) -> bigint, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -419,16 +629,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -447,42 +661,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -508,7 +780,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1698460028409 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -521,7 +793,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -534,6 +806,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -567,8 +843,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -582,6 +871,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -630,7 +926,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -638,7 +934,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -646,6 +942,10 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -664,43 +964,101 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 4) -> float, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 0) -> float, VectorUDAFMaxDouble(col 1) -> float, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -732,16 +1090,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -760,42 +1122,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 4) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -821,7 +1241,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39479.635992884636 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -834,7 +1254,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -847,6 +1267,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -880,8 +1304,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -895,6 +1332,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -944,7 +1388,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -971,7 +1415,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -998,6 +1442,10 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1015,15 +1463,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterLongColEqualLongScalar(col 11, val 1) -> boolean, FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 4, 0] Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE @@ -1033,8 +1499,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 8cf503f..f0d2a50 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -35,7 +35,7 @@ LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -86,15 +90,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -107,8 +130,21 @@ STAGE PLANS: value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -127,16 +163,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -258,7 +311,7 @@ NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -292,7 +345,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -325,6 +378,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -343,15 +400,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -364,8 +440,21 @@ STAGE PLANS: value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -384,16 +473,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index c227e44..e6fdca9 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -35,7 +35,7 @@ ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -68,6 +68,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -107,8 +111,21 @@ STAGE PLANS: value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) @@ -127,6 +144,13 @@ STAGE PLANS: value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 768aed4..a9908a4 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -33,7 +33,7 @@ ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -64,6 +64,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -103,8 +107,21 @@ STAGE PLANS: value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) @@ -123,6 +140,13 @@ STAGE PLANS: value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index a1eb629..22041cc 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -80,8 +84,21 @@ STAGE PLANS: value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/llap/vectorization_17.q.out ql/src/test/results/clientpositive/llap/vectorization_17.q.out index 3a77cc9..007ce8f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -26,7 +26,7 @@ ORDER BY cbigint, cfloat PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -50,6 +50,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -81,8 +85,23 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_7.q.out ql/src/test/results/clientpositive/llap/vectorization_7.q.out index 54cc498..aeec133 100644 --- ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -29,7 +29,7 @@ LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -56,6 +56,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -73,32 +77,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,7 +239,7 @@ NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -221,7 +267,7 @@ ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -248,6 +294,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -265,32 +315,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_8.q.out ql/src/test/results/clientpositive/llap/vectorization_8.q.out index 70ef835..d0410bd 100644 --- ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -27,7 +27,7 @@ LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -52,6 +52,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,32 +73,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 10.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 16.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -180,7 +226,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -206,7 +252,7 @@ ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -231,6 +277,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -248,32 +298,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 12.503)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 11.998)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index a1eb629..22041cc 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -80,8 +84,21 @@ STAGE PLANS: value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out index 9a6cb52..74455f5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out @@ -12,28 +12,78 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_decimal_test + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean + predicate: (cint is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdate (type: date), cdecimal (type: decimal(20,10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: date_decimal_test - Filter Operator - predicate: (cint is not null and cdouble is not null) (type: boolean) - Select Operator - expressions: cdate (type: date), cdecimal (type: decimal(20,10)) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index 2078e81..872e7f3 100644 --- ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -78,8 +82,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out index 6bd4bd6..6324e01 100644 --- ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -39,8 +43,21 @@ STAGE PLANS: value expressions: _col0 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index a14d515..d6c405e 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -30,7 +30,8 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -98,7 +99,8 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -134,6 +136,10 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -151,15 +157,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) -> boolean) -> boolean predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 1, 4, 0] Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE @@ -169,8 +193,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) @@ -277,7 +314,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -315,7 +353,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -348,6 +387,10 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -365,15 +408,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2036734 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 5, 0] Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE @@ -383,8 +444,21 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) @@ -485,7 +559,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -522,7 +597,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -554,6 +630,10 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -571,15 +651,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2, 5] Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE @@ -589,8 +687,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) @@ -690,7 +801,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -717,7 +829,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -739,6 +852,10 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -756,15 +873,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) outputColumnNames: ctinyint, cbigint, cint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 2, 4] Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE @@ -774,8 +909,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) @@ -855,7 +1003,8 @@ PREHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -896,7 +1045,8 @@ POSTHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -932,6 +1082,10 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -949,32 +1103,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3056470 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) Statistics: Num rows: 9898 Data size: 2462086 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(20,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(20,18) Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(20,18)) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(20,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1118,7 +1314,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1158,7 +1355,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1193,6 +1391,10 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1210,32 +1412,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) Statistics: Num rows: 8195 Data size: 1735170 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28] + selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21] Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1330,7 +1574,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1369,7 +1614,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1403,6 +1649,10 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1420,33 +1670,75 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) -> boolean) -> boolean predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) Statistics: Num rows: 10922 Data size: 2312410 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(19,18)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28] + selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(19,18), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(19,18)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(19,18)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 75 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1612,7 +1904,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1644,7 +1937,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1671,6 +1965,10 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1688,33 +1986,75 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2528254 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) Statistics: Num rows: 3868 Data size: 795962 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24] + selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 45 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1835,7 +2175,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1860,7 +2201,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1880,6 +2222,10 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1898,15 +2244,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) outputColumnNames: csmallint, cbigint, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 0] Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1919,8 +2284,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) @@ -1939,16 +2317,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(19,18)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2031,7 +2426,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2063,7 +2459,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2090,6 +2487,10 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2108,15 +2509,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 293580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cfloat (type: float) outputColumnNames: cdouble, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 4] Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) + Group By Vectorization: + aggregators: VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFSumDouble(col 5) -> double + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 5 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2129,8 +2549,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) @@ -2149,13 +2582,27 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2232,7 +2679,8 @@ PREHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2292,7 +2740,8 @@ POSTHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2347,6 +2796,10 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2365,15 +2818,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColNotEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean, SelectColumnIsNotNull(col 11) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss) -> boolean, FilterDoubleScalarLessDoubleColumn(val -3.0, col 12)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean) -> boolean, FilterDoubleColEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 10) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 6, 2, 1, 0, 4, 5] Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) + Group By Vectorization: + aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 8, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2386,8 +2858,21 @@ STAGE PLANS: value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) @@ -2406,16 +2891,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: tinyint), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: double), KEY.reducesinkkey22 (type: double), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey24 (type: double), KEY.reducesinkkey25 (type: double), KEY.reducesinkkey26 (type: double), KEY.reducesinkkey27 (type: tinyint), KEY.reducesinkkey28 (type: double), KEY.reducesinkkey29 (type: double), KEY.reducesinkkey30 (type: double), KEY.reducesinkkey31 (type: double), KEY.reducesinkkey32 (type: decimal(20,18)), KEY.reducesinkkey33 (type: double), KEY.reducesinkkey34 (type: bigint), KEY.reducesinkkey35 (type: double), KEY.reducesinkkey36 (type: bigint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey38 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 8, 38] Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 25172 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 25172 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2597,7 +3099,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2642,7 +3145,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2682,6 +3186,10 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2700,15 +3208,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 4, 3, 2, 5, 0, 1] Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -2721,8 +3248,21 @@ STAGE PLANS: value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) @@ -2741,13 +3281,27 @@ STAGE PLANS: value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(23,3)), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: float), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col12 (type: float), VALUE._col15 (type: bigint), VALUE._col16 (type: double), VALUE._col17 (type: decimal(24,3)), VALUE._col18 (type: decimal(25,3)), VALUE._col19 (type: double), VALUE._col20 (type: decimal(25,3)), VALUE._col21 (type: double), VALUE._col22 (type: double), VALUE._col23 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24] Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2856,12 +3410,16 @@ create table test_count(i int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_count -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2879,29 +3437,71 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2923,12 +3523,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_count #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(i) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(i) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2946,31 +3550,73 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: i (type: int) outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(i) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3066,12 +3712,16 @@ POSTHOOK: Lineage: alltypesnullorc.cstring2 SIMPLE [(alltypesnull)alltypesnull.F POSTHOOK: Lineage: alltypesnullorc.ctimestamp1 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctimestamp2 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctinyint SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3089,29 +3739,71 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3133,12 +3825,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 12288 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3156,31 +3852,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3202,12 +3940,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3225,31 +3967,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3271,12 +4055,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3294,31 +4082,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cfloat) + Group By Vectorization: + aggregators: VectorUDAFCount(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3340,12 +4170,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3363,31 +4197,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cstring1 (type: string) outputColumnNames: cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cstring1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 6) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3409,12 +4285,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3432,31 +4312,73 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cboolean1 (type: boolean) outputColumnNames: cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cboolean1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 10) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index 6c6c6d6..14606ed 100644 --- ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -117,33 +121,71 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -187,12 +229,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -210,17 +256,36 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -237,6 +302,10 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -285,7 +354,7 @@ PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileforma -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box @@ -293,9 +362,13 @@ POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileform -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -313,17 +386,36 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -340,6 +432,10 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 6e13369..127c2c3 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -48,15 +52,30 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:String Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -64,6 +83,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -113,7 +140,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -131,7 +158,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -149,6 +176,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -163,15 +194,30 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:String Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -179,6 +225,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_casts.q.out ql/src/test/results/clientpositive/llap/vectorized_casts.q.out index a95702d..0fb8552 100644 --- ql/src/test/results/clientpositive/llap/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_casts.q.out @@ -3,7 +3,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) @@ -82,7 +82,7 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) @@ -156,6 +156,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -186,6 +190,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_context.q.out ql/src/test/results/clientpositive/llap/vectorized_context.q.out index 1f70a01..855a50f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_context.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_context.q.out @@ -82,20 +82,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@household_demographics POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,6 +132,14 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -176,6 +188,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -195,6 +215,14 @@ STAGE PLANS: Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index f45e730..e2999a5 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -203,7 +203,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -218,7 +218,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -233,20 +233,62 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1) -> 2:long, VectorUDFYearTimestamp(col 1, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 1, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 7:long, VectorUDFWeekOfYearTimestamp(col 1, field WEEK_OF_YEAR) -> 8:long, CastTimestampToDate(col 1) -> 9:date, VectorUDFDateTimestamp(col 1) -> 10:date, VectorUDFDateAddColScalar(col 1, val 2) -> 11:date, VectorUDFDateSubColScalar(col 1, val 2) -> 12:date, VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 13:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink + ListSink PREHOOK: query: SELECT to_unix_timestamp(fl_time), @@ -419,7 +461,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -434,7 +476,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -449,20 +491,62 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 0, 9, 10, 11, 12] + selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long, VectorUDFMonthDate(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 7:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 8:long, VectorUDFDateLong(col 0) -> 9:date, VectorUDFDateAddColScalar(col 0, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0, val 2) -> 11:date, VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 12:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink + ListSink PREHOOK: query: SELECT to_unix_timestamp(fl_date), @@ -635,7 +719,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -649,7 +733,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -663,20 +747,62 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9, 2, 3, 12, 13, 16] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 1, field YEAR) -> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 1, field MONTH) -> 2:long, VectorUDFMonthDate(col 0, field MONTH) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 2:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 1, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 3:long) -> 9:long, LongColEqualLongColumn(col 10, col 0)(children: CastTimestampToDate(col 1) -> 10:date) -> 2:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateTimestamp(col 1) -> 10:date, VectorUDFDateLong(col 0) -> 11:date) -> 3:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateAddColScalar(col 1, val 2) -> 10:date, VectorUDFDateAddColScalar(col 0, val 2) -> 11:date) -> 12:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateSubColScalar(col 1, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0, val 2) -> 11:date) -> 13:long, LongColEqualLongColumn(col 14, col 15)(children: VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 14:long, VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 15:long) -> 16:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - ListSink + ListSink PREHOOK: query: -- Should all be true or NULL SELECT @@ -849,7 +975,7 @@ true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -858,7 +984,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -867,22 +993,68 @@ POSTHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 4, 5, 6, 8] + selectExpressions: VectorUDFDateLong(col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 3:date, VectorUDFDateLong(col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 4:date, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 5:long, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 6:long, VectorUDFDateDiffColCol(col 2, col 7)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date, VectorUDFDateSubColScalar(col 0, val 2) -> 7:date) -> 8:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT fl_date, @@ -927,7 +1099,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### 2009-07-30 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -935,7 +1107,7 @@ PREHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -943,6 +1115,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -961,43 +1137,101 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(fl_date), max(fl_date), count(fl_date), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 1) -> date, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index ced9795..0b5d516 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -16,10 +16,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,8 +61,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -88,10 +107,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -126,8 +149,23 @@ STAGE PLANS: Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -145,6 +183,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2), std(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 76c8404..c2e1dfd 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -34,10 +34,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 11 12 -PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds +PREHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds +POSTHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds POSTHOOK: type: CREATETABLE_AS_SELECT +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -74,8 +78,19 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -199,11 +214,15 @@ POSTHOOK: Output: default@srcpart_double_hour POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -234,6 +253,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -269,6 +292,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -290,6 +321,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -329,10 +367,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -363,6 +405,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -383,6 +429,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -404,6 +458,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -457,13 +518,17 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -495,6 +560,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -530,6 +599,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -565,6 +642,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -602,6 +687,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -645,12 +737,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -683,6 +779,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -703,6 +803,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -723,6 +831,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -760,6 +876,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -815,11 +938,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -849,6 +976,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -899,6 +1030,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -920,6 +1059,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -959,10 +1105,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -993,6 +1143,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1013,6 +1167,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1034,6 +1196,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1085,11 +1254,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1120,6 +1293,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1155,6 +1332,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1176,6 +1361,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1215,10 +1407,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1249,6 +1445,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1269,6 +1469,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1290,6 +1498,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1339,11 +1554,15 @@ POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 0 PREHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1374,6 +1593,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1409,6 +1632,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1430,6 +1661,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1469,10 +1707,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1503,6 +1745,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1538,6 +1784,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1559,6 +1813,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1598,10 +1859,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1632,6 +1897,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1652,6 +1921,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1673,6 +1950,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1712,10 +1996,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1746,6 +2034,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1766,6 +2058,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1787,6 +2087,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1839,10 +2146,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1873,6 +2184,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1908,6 +2223,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1929,6 +2252,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1983,11 +2313,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 1000 Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2015,6 +2349,10 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2035,6 +2373,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2056,6 +2398,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2071,6 +2420,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -2118,11 +2474,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 1000 Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY POSTHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2151,6 +2511,10 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2170,6 +2534,14 @@ STAGE PLANS: value expressions: _col0 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2197,6 +2569,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2238,11 +2617,15 @@ POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 1500 PREHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr PREHOOK: type: QUERY POSTHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2272,6 +2655,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2322,6 +2709,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2343,6 +2738,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2383,11 +2785,15 @@ POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2418,6 +2824,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2453,6 +2863,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2474,6 +2892,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2494,10 +2919,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2546,6 +2975,14 @@ STAGE PLANS: Target Vertex: Map 4 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2562,6 +2999,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2583,6 +3024,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2604,11 +3052,15 @@ STAGE PLANS: ListSink PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2638,6 +3090,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2673,6 +3129,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2694,6 +3158,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2715,13 +3186,17 @@ STAGE PLANS: ListSink PREHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY POSTHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2753,6 +3228,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -2788,6 +3267,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -2808,6 +3295,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2845,6 +3340,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2884,12 +3386,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2925,6 +3431,12 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 5 Map Operator Tree: TableScan @@ -2945,6 +3457,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -2965,6 +3485,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3002,6 +3530,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3038,11 +3573,15 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 0 PREHOOK: query: -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3075,6 +3614,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -3095,6 +3638,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -3115,6 +3662,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3136,6 +3687,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3151,6 +3709,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3187,6 +3752,13 @@ STAGE PLANS: Target Vertex: Map 1 Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3247,10 +3819,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2000 -PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3283,6 +3859,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -3303,6 +3883,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -3323,6 +3907,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3346,6 +3934,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3361,6 +3956,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3397,6 +3999,13 @@ STAGE PLANS: Target Vertex: Map 1 Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3458,10 +4067,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2008-04-08 2008-04-09 -PREHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3496,6 +4109,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 10 Map Operator Tree: TableScan @@ -3516,6 +4133,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -3534,6 +4155,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -3554,8 +4179,19 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 11 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3607,6 +4243,13 @@ STAGE PLANS: Target Vertex: Map 5 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3638,6 +4281,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3651,6 +4301,13 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3732,11 +4389,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 2008-04-09 2008-04-09 PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3780,6 +4441,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -3815,8 +4480,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3870,13 +4550,17 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3929,6 +4613,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -3964,6 +4652,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -3999,8 +4695,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4056,11 +4767,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4103,6 +4818,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4153,8 +4872,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4206,11 +4940,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4254,6 +4992,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4289,8 +5031,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4331,11 +5088,15 @@ POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 0 PREHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4379,6 +5140,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4414,8 +5179,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4455,10 +5235,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4502,6 +5286,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4537,8 +5325,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4594,12 +5397,16 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4626,6 +5433,10 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -4646,8 +5457,19 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -4676,6 +5498,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4724,11 +5553,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4772,6 +5605,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4807,8 +5644,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4829,10 +5681,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4879,6 +5735,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -4895,8 +5759,19 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4918,11 +5793,15 @@ STAGE PLANS: ListSink PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4952,6 +5831,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -4985,8 +5868,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5008,13 +5906,17 @@ STAGE PLANS: ListSink PREHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY POSTHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5067,6 +5969,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -5102,6 +6008,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -5122,8 +6036,23 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5163,12 +6092,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5203,6 +6136,12 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan @@ -5246,6 +6185,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -5266,8 +6213,23 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5304,11 +6266,15 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 0 PREHOOK: query: -- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: -- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5356,6 +6322,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -5376,6 +6346,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 6 Map Operator Tree: TableScan @@ -5396,8 +6370,19 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -5413,6 +6398,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -5449,6 +6441,13 @@ STAGE PLANS: Target Vertex: Map 1 Reducer 7 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -5547,10 +6546,14 @@ POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(s POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 1bab6f7..a7c0d10 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -28,12 +32,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -41,6 +56,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -48,9 +67,21 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 2, 12] + selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long Statistics: Num rows: 19518 Data size: 156144 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgLong(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 12) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE @@ -60,27 +91,63 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out index 0a81f62..dbc2a78 100644 --- ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -54,7 +54,7 @@ and sin(cfloat) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -106,22 +106,69 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 293580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean + predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) + Statistics: Num rows: 2048 Data size: 48960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:Double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 30:Double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:Double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:String, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:String, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: DoubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double + Statistics: Num rows: 2048 Data size: 1724272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2048 Data size: 1724272 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) - Select Operator - expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 - ListSink + ListSink PREHOOK: query: select cdouble diff --git ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 800cbb6..93a68e9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -66,6 +70,14 @@ STAGE PLANS: value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -86,6 +98,14 @@ STAGE PLANS: value expressions: _col1 (type: smallint), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -105,8 +125,23 @@ STAGE PLANS: Statistics: Num rows: 9174 Data size: 27400 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 8345132..6285c99 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -46,16 +46,20 @@ POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldS POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select * +PREHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * +POSTHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -99,7 +103,7 @@ POSTHOOK: Input: default@alltypes_parquet 528534767 27 -7824 27.0 -7824.0 cvLH6Eat2yFsyy7p 528534767 -11 -15431 -11.0 -15431.0 cvLH6Eat2yFsyy7p 528534767 61 -15549 61.0 -15549.0 cvLH6Eat2yFsyy7p -PREHOOK: query: explain select ctinyint, +PREHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -108,7 +112,7 @@ PREHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain select ctinyint, +POSTHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -117,6 +121,10 @@ POSTHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -152,8 +160,17 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index b49d5dd..3c977f1 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -118,12 +118,12 @@ POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] PREHOOK: query: -- select -explain +explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types PREHOOK: type: QUERY POSTHOOK: query: -- select -explain +explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types POSTHOOK: type: QUERY @@ -169,10 +169,10 @@ POSTHOOK: Input: default@parquet_types 119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 12.83 120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD 73.04 121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde 90.33 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types POSTHOOK: type: QUERY Plan optimized by CBO. @@ -215,7 +215,7 @@ uvwzy 5 abcdede 7 4.76 1 vwxyz 5 abcdede 7 12.83 1 wxyza 5 abcde 5 73.04 1 bcdef 5 abcde 5 90.33 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -227,7 +227,7 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 996b893..423fdbf 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchem POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] PREHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -132,7 +132,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -142,6 +142,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -172,6 +176,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -224,6 +236,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -255,6 +272,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -377,7 +399,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -386,13 +408,17 @@ sort by j.p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -428,6 +454,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -497,6 +531,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -571,6 +613,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -602,6 +649,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -707,7 +759,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -715,12 +767,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -750,6 +806,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -802,6 +866,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -896,7 +965,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -908,7 +977,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -918,6 +987,10 @@ from noop(on part_orc order by p_name ) abc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -948,6 +1021,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1000,6 +1081,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1031,6 +1117,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1153,7 +1244,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1165,7 +1256,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1175,6 +1266,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1205,6 +1300,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1257,6 +1360,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1288,6 +1396,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1411,7 +1524,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1424,7 +1537,7 @@ group by p_mfgr, p_name, p_size PREHOOK: type: QUERY POSTHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1435,6 +1548,10 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1465,6 +1582,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1517,6 +1642,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1556,6 +1686,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -1681,7 +1816,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -1690,13 +1825,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1727,6 +1866,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1796,6 +1943,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1848,6 +2003,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -1968,7 +2128,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -1977,13 +2137,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2017,6 +2181,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2083,6 +2255,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2174,6 +2354,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -2259,7 +2444,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2268,13 +2453,17 @@ order by p_name, p_size desc) PREHOOK: type: QUERY POSTHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2320,6 +2509,12 @@ STAGE PLANS: auto parallelism: true Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2372,6 +2567,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2403,6 +2603,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2506,7 +2711,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2517,7 +2722,7 @@ from noopwithmap(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2526,6 +2731,10 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2572,6 +2781,12 @@ STAGE PLANS: auto parallelism: true Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2624,6 +2839,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2656,6 +2876,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2776,7 +3001,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2787,7 +3012,7 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2796,6 +3021,10 @@ from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2826,6 +3055,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2878,6 +3115,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2909,6 +3151,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3029,7 +3276,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3041,7 +3288,7 @@ order by p_mfgr, p_name PREHOOK: type: QUERY POSTHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3051,6 +3298,10 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3082,6 +3333,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3134,6 +3393,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3188,6 +3452,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3227,6 +3496,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3349,7 +3623,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3364,7 +3638,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 PREHOOK: type: QUERY POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3377,6 +3651,10 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3407,6 +3685,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3459,6 +3745,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3490,6 +3781,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3610,7 +3906,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3624,7 +3920,7 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3636,6 +3932,10 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3667,6 +3967,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3736,6 +4044,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3788,6 +4104,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3845,6 +4166,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3986,7 +4312,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -3994,12 +4320,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4030,6 +4360,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4082,6 +4420,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -4121,6 +4464,13 @@ STAGE PLANS: Reducer 3 Execution mode: vectorized, llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -4215,7 +4565,7 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part_orc POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4223,7 +4573,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4231,6 +4581,10 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4271,6 +4625,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4323,6 +4685,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -4356,6 +4723,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) @@ -4496,7 +4868,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4512,7 +4884,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4528,6 +4900,10 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -4564,6 +4940,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4616,6 +5000,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4656,6 +5045,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4733,6 +5127,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -4774,6 +5173,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5041,7 +5445,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5060,7 +5464,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5077,6 +5481,10 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5108,6 +5516,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5160,6 +5576,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5221,6 +5642,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5260,6 +5686,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5396,7 +5827,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5415,7 +5846,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5432,6 +5863,10 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5464,6 +5899,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5516,6 +5959,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5554,6 +6002,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5585,6 +6038,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5616,6 +6074,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5752,7 +6215,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5769,7 +6232,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5784,6 +6247,10 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5815,6 +6282,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5867,6 +6342,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5905,6 +6385,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5943,6 +6428,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6075,7 +6565,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6094,7 +6584,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6111,6 +6601,10 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6143,6 +6637,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6195,6 +6697,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6233,6 +6740,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6280,6 +6792,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6312,6 +6829,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6448,7 +6970,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6466,7 +6988,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6482,6 +7004,10 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6513,6 +7039,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6565,6 +7099,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6619,6 +7158,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6658,6 +7202,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6792,7 +7341,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6808,7 +7357,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6822,6 +7371,10 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6853,6 +7406,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6905,6 +7466,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6959,6 +7525,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6991,6 +7562,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index e376ca1..8cdb67d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -29,39 +33,85 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -88,6 +138,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) @@ -101,13 +156,27 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out index bfac939..84fdc16 100644 --- ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -24,7 +24,7 @@ and cstring1 like '%' PREHOOK: type: QUERY POSTHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -46,22 +46,54 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 1816546 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) + Statistics: Num rows: 1024 Data size: 151470 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) - Select Operator - expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - ListSink + ListSink PREHOOK: query: select substr(cstring1, 1, 2) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index ceaac4f..636463b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -19,10 +19,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -48,10 +48,10 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -87,10 +87,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -116,10 +116,10 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 4092911..ae59b06 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -73,7 +73,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -116,26 +120,61 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -218,7 +257,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -231,7 +270,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -244,6 +283,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -261,26 +304,61 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -363,7 +441,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -376,7 +454,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -389,6 +467,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -406,26 +488,61 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -511,7 +628,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -525,7 +642,7 @@ FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY POSTHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -538,6 +655,10 @@ EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -555,26 +676,61 @@ STAGE PLANS: TableScan alias: alltypesorc_wrong Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -620,20 +776,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -651,31 +811,73 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -708,15 +910,19 @@ POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -749,20 +955,47 @@ STAGE PLANS: value expressions: _col0 (type: double) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: round(_col0, 3) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 3) -> 1:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -788,7 +1021,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -799,7 +1032,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -810,6 +1043,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -827,12 +1064,26 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE @@ -842,8 +1093,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out index 1e74446..2a142a0 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -16,7 +16,7 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -34,22 +34,69 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean + predicate: ((cbigint % 250) = 0) (type: boolean) + Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [14, 16, 18, 20, 21, 22, 24, 26, 8, 27, 29] + selectExpressions: CastMillisecondsLongToTimestamp(col 0) -> 14:timestamp, CastMillisecondsLongToTimestamp(col 1) -> 16:timestamp, CastMillisecondsLongToTimestamp(col 2) -> 18:timestamp, CastMillisecondsLongToTimestamp(col 3) -> 20:timestamp, CastDoubleToTimestamp(col 4) -> 21:timestamp, CastDoubleToTimestamp(col 5) -> 22:timestamp, CastMillisecondsLongToTimestamp(col 10) -> 24:timestamp, CastMillisecondsLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 26:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 27:Timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 28:string) -> 29:Timestamp + Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: ((cbigint % 250) = 0) (type: boolean) - Select Operator - expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - ListSink + ListSink PREHOOK: query: select -- to timestamp @@ -115,7 +162,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL 1969-12-19 17:33:32.75 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-30 22:03:04.018 1970-01-21 12:50:53.75 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-27 18:49:09.583 1970-01-14 22:35:27 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -133,7 +180,7 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -151,22 +198,69 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean + predicate: ((cbigint % 250) = 0) (type: boolean) + Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19, 20, 8, 21, 23] + selectExpressions: CastLongToTimestamp(col 0) -> 13:timestamp, CastLongToTimestamp(col 1) -> 14:timestamp, CastLongToTimestamp(col 2) -> 15:timestamp, CastLongToTimestamp(col 3) -> 16:timestamp, CastDoubleToTimestamp(col 4) -> 17:timestamp, CastDoubleToTimestamp(col 5) -> 18:timestamp, CastLongToTimestamp(col 10) -> 19:timestamp, CastLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 20:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 21:Timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 22:string) -> 23:Timestamp + Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: ((cbigint % 250) = 0) (type: boolean) - Select Operator - expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - ListSink + ListSink PREHOOK: query: select -- to timestamp diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index fbb43c4..7d722d0 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,27 +36,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnInList(col 3, values [-67, -171]) -> boolean predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -65,10 +107,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -85,31 +131,77 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 4:boolean) -> boolean predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -122,10 +214,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,27 +238,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> boolean predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -175,10 +309,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -195,31 +333,77 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean) -> boolean predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -232,10 +416,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -252,27 +440,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 3, left -2, right 1) -> boolean predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -285,10 +511,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -305,27 +535,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -338,10 +606,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -358,27 +630,65 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 1, left -20, right 45.9918918919) -> boolean predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -391,10 +701,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -411,31 +725,77 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -695,12 +1055,16 @@ POSTHOOK: Input: default@decimal_date_test 6172 PREHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY POSTHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -717,12 +1081,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -731,20 +1110,50 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -757,10 +1166,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -777,12 +1190,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -791,20 +1219,50 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -817,10 +1275,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -837,12 +1299,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 4:Long Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -851,20 +1328,50 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -877,10 +1384,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -897,12 +1408,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:Long Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -911,20 +1437,50 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 424a2c9..bf6c494 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -97,20 +97,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,12 +132,27 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: i (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(50), avg(50.0), avg(50) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 2 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -146,7 +165,20 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) @@ -162,16 +194,33 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_char_4.q.out ql/src/test/results/clientpositive/spark/vector_char_4.q.out index 3e551bb..943a4b1 100644 --- ql/src/test/results/clientpositive/spark/vector_char_4.q.out +++ ql/src/test/results/clientpositive/spark/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,12 +146,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -155,6 +170,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 73272fb..4322dc7 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] PREHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization expression select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY POSTHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization expression select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1256,11 +1260,24 @@ STAGE PLANS: TableScan alias: web_sales Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [16] Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 16 + native: false + projectedOutputColumns: [] keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 @@ -1269,35 +1286,88 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 1752000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index dbaf14d..79638c1 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -189,10 +193,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -209,29 +217,66 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index cfdfce1..919e290 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -23,7 +23,7 @@ POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -33,13 +33,17 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -56,12 +60,26 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -70,27 +88,65 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFCountMerge(col 5) -> bigint, VectorUDAFMaxDecimal(col 6) -> decimal(23,14), VectorUDAFMinDecimal(col 7) -> decimal(23,14), VectorUDAFSumDecimal(col 8) -> decimal(38,18), VectorUDAFCountMerge(col 9) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 9, val 1) -> boolean predicate: (_col9 > 1) (type: boolean) Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -130,7 +186,7 @@ POSTHOOK: Input: default@decimal_vgby 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 PREHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby @@ -138,13 +194,17 @@ EXPLAIN SELECT cint, HAVING COUNT(*) > 1 PREHOOK: type: QUERY POSTHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -161,12 +221,27 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -178,7 +253,20 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col4] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) diff --git ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out index 0493994..11d7609 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -93,18 +97,40 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:long) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -117,12 +143,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:long) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -130,18 +167,35 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index 803a53b..af9ec87 100644 --- ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,11 +131,24 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -140,12 +157,38 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -153,9 +196,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_elt.q.out ql/src/test/results/clientpositive/spark/vector_elt.q.out index bb66867..b49462a 100644 --- ql/src/test/results/clientpositive/spark/vector_elt.q.out +++ ql/src/test/results/clientpositive/spark/vector_elt.q.out @@ -1,29 +1,79 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean + predicate: (ctinyint > 0) (type: boolean) + Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 6, 2, 16] + selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string + Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (ctinyint > 0) (type: boolean) - Select Operator - expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc @@ -47,7 +97,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -60,7 +110,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -73,22 +123,67 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: ConstantVectorExpression(val defg) -> 12:string, ConstantVectorExpression(val cc) -> 13:string, ConstantVectorExpression(val abc) -> 14:string, ConstantVectorExpression(val 2) -> 15:string, ConstantVectorExpression(val 12345) -> 16:string, ConstantVectorExpression(val 123456789012) -> 17:string, ConstantVectorExpression(val 1.25) -> 18:string, ConstantVectorExpression(val 16.0) -> 19:string, ConstantVectorExpression(val null) -> 20:string, ConstantVectorExpression(val null) -> 21:string + Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Limit - Number of rows: 1 - ListSink + ListSink PREHOOK: query: SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), diff --git ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index e13c311..a99dfd8 100644 --- ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,12 +131,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -141,14 +159,41 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -156,9 +201,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index 511bd79..ef19bad 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -32,12 +32,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -53,18 +57,45 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -77,12 +108,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -90,6 +132,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col1 input vertices: 0 Map 1 @@ -97,15 +147,35 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -126,12 +196,16 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -147,23 +221,56 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -176,12 +283,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -189,18 +307,42 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -257,12 +399,16 @@ POSTHOOK: Input: default@values__tmp__table__4 POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -278,18 +424,45 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -302,12 +475,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -315,6 +499,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [2, 0] + smallTableMapping: [2] outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -322,15 +514,36 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -351,12 +564,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -372,18 +589,45 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -396,12 +640,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -409,18 +664,44 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -441,12 +722,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -462,18 +747,45 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -486,12 +798,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -499,6 +822,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2, 0] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -506,15 +838,37 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 1] + selectExpressions: LongColMultiplyLongScalar(col 0, val 2) -> 3:long, LongColMultiplyLongScalar(col 0, val 5) -> 4:long Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String, bigint, bigint Local Work: Map Reduce Local Work @@ -535,12 +889,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -556,18 +914,45 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -580,12 +965,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -593,6 +989,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -600,15 +1005,36 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -629,12 +1055,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -650,18 +1080,45 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -674,12 +1131,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -687,6 +1155,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [1, 2, 0] + smallTableMapping: [2] outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -694,15 +1171,36 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -723,12 +1221,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -744,18 +1246,45 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -768,12 +1297,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -781,6 +1321,15 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 @@ -788,15 +1337,36 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -817,12 +1387,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -838,18 +1412,45 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -862,12 +1463,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -875,6 +1487,15 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableRetainedColumns: [0, 1] + bigTableValueColumns: [0, 1] + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 @@ -882,15 +1503,36 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index c08fbda..91af229 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -40,6 +44,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 4 @@ -56,6 +68,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -103,10 +123,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 012c3eb..f4a1e6e 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -55,6 +59,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Map 3 @@ -75,17 +83,37 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -137,6 +165,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work @@ -171,19 +203,23 @@ POSTHOOK: Input: default@lineitem 64128 9141 82704 7721 PREHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY POSTHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -217,6 +253,10 @@ STAGE PLANS: keys: 0 _col0 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Map 3 @@ -237,17 +277,37 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -299,6 +359,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index 4710a73..cbf7d03 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,12 +128,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 7 + native: false + projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -138,14 +156,41 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -153,17 +198,36 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index b311c49..d1319b8 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -83,15 +87,38 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -104,9 +131,16 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -114,18 +148,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [1] + bigTableOuterKeyMapping: 1 -> 2 + bigTableRetainedColumns: [0, 1, 2] + bigTableValueColumns: [0, 1] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2, 3] + smallTableMapping: [3] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, String Local Work: Map Reduce Local Work @@ -155,12 +216,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -176,15 +241,38 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -197,9 +285,16 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -207,18 +302,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableOuterKeyMapping: 0 -> 3 + bigTableRetainedColumns: [0, 1, 3] + bigTableValueColumns: [0, 1] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [2, 3, 0, 1] + smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String, bigint Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 6b89fb3..3a7e27f 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -216,18 +216,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -243,15 +247,38 @@ STAGE PLANS: TableScan alias: cd Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col2 (type: int) 1 _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -264,9 +291,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -274,18 +308,45 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col2 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableOuterKeyMapping: 2 -> 14 + bigTableRetainedColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14] + bigTableValueColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + smallTableMapping: [12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, Double, Double, String, String, Timestamp, Timestamp, bigint, bigint Local Work: Map Reduce Local Work @@ -332,18 +393,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -359,15 +424,38 @@ STAGE PLANS: TableScan alias: hd Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -380,9 +468,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -390,18 +485,42 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -534,7 +653,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -543,7 +662,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -552,6 +671,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -567,15 +690,38 @@ STAGE PLANS: TableScan alias: cd Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work Map 4 @@ -583,15 +729,38 @@ STAGE PLANS: TableScan alias: hd Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -606,9 +775,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -616,6 +792,14 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -626,32 +810,84 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + bigTableKeyColumns: [0] + bigTableRetainedColumns: [0] + bigTableValueColumns: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 4 Statistics: Num rows: 17 Data size: 4843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 113c7d0..453db4b 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,6 +244,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -259,15 +263,38 @@ STAGE PLANS: TableScan alias: cd Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work Map 4 @@ -275,15 +302,38 @@ STAGE PLANS: TableScan alias: hd Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -298,9 +348,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int), cbigint (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -308,6 +365,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: [2] + bigTableRetainedColumns: [3] + bigTableValueColumns: [3] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [3] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -318,32 +383,84 @@ STAGE PLANS: keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: [3] + bigTableRetainedColumns: [3] + bigTableValueColumns: [3] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + projectedOutputColumns: [3] outputColumnNames: _col1 input vertices: 1 Map 4 Statistics: Num rows: 24 Data size: 6336 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index c5a8de5..fbd294e 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,117 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -380,7 +270,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -389,7 +279,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -398,117 +288,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -534,7 +314,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -543,7 +323,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -552,117 +332,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index 94860ab..b9b97f6 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -246,85 +246,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - input vertices: - 1 Map 2 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select * @@ -397,85 +331,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select c.ctinyint @@ -904,7 +772,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -913,7 +781,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -922,117 +790,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 36 Data size: 5307 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.ctinyint diff --git ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index bd9b852..eb61044 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -66,105 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -184,111 +100,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -308,111 +134,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6058 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -432,111 +168,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6248 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -556,7 +202,7 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -565,7 +211,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -574,117 +220,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 7329 Data size: 2451 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -770,105 +306,21 @@ POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -888,111 +340,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 39112 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1012,111 +374,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 11171 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1136,111 +408,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 14371 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark #### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1260,7 +442,7 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 17792 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1269,7 +451,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1278,117 +460,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 7329 Data size: 3335 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/results/clientpositive/spark/vector_string_concat.q.out ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 5497426..5bc0f6e 100644 --- ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -97,32 +97,77 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Select Operator + expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 12, 11] + selectExpressions: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 12:String_Family, StringGroupColConcatStringScalar(col 13, val |)(children: StringScalarConcatStringGroupCol(val |, col 11)(children: StringRTrim(col 13)(children: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 13:String_Family) -> 11:String) -> 13:String_Family) -> 11:String_Family + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 20 Processor Tree: - TableScan - alias: over1korc - Select Operator - expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 20 - ListSink + ListSink PREHOOK: query: SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, @@ -265,20 +310,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -296,11 +345,25 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [19] + selectExpressions: StringGroupConcatColCol(col 17, col 18)(children: StringGroupColConcatStringScalar(col 18, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 17)(children: CastLongToString(col 13)(children: CastDoubleToLong(col 15)(children: DoubleColAddDoubleScalar(col 16, val 1.0)(children: DoubleColDivideDoubleScalar(col 15, val 3.0)(children: CastLongToDouble(col 14)(children: LongColSubtractLongScalar(col 13, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 13:long) -> 14:long) -> 15:double) -> 16:double) -> 15:double) -> 13:long) -> 17:String) -> 18:String_Family) -> 17:String_Family, CastLongToString(col 13)(children: VectorUDFYearDate(col 12, field YEAR) -> 13:long) -> 18:String) -> 19:String_Family Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 19 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -309,13 +372,39 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -323,20 +412,42 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out index 9a46ee1..1c8e479 100644 --- ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,12 +146,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -155,6 +170,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 22fe7cd..b297a7d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -1,7 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -12,7 +12,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -37,42 +41,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 1) -> tinyint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -104,16 +166,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,41 +197,99 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -191,7 +315,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39856 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -204,7 +328,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -217,6 +341,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -248,7 +376,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -262,6 +403,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -310,7 +458,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -318,7 +466,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -326,6 +474,10 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -343,42 +495,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 3) -> bigint, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -410,16 +620,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -437,41 +651,99 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -497,7 +769,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1698460028409 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -510,7 +782,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -523,6 +795,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -554,7 +830,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -568,6 +857,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -616,7 +912,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -624,7 +920,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -632,6 +928,10 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -649,42 +949,100 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 4) -> float, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 0) -> float, VectorUDAFMaxDouble(col 1) -> float, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -716,16 +1074,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -743,41 +1105,99 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 4) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -803,7 +1223,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39479.635992884636 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -816,7 +1236,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -829,6 +1249,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -860,7 +1284,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -874,6 +1311,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -923,7 +1367,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -950,7 +1394,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -977,6 +1421,10 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -993,15 +1441,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterLongColEqualLongScalar(col 11, val 1) -> boolean, FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 4, 0] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE @@ -1010,7 +1476,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index 6d5c9d9..37b5a7d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -35,7 +35,7 @@ LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -85,15 +89,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -105,7 +128,20 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -124,16 +160,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -255,7 +308,7 @@ NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -289,7 +342,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -322,6 +375,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -339,15 +396,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -359,7 +435,20 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -378,16 +467,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 99192bd..89b68f7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -35,7 +35,7 @@ ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -68,6 +68,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -105,7 +109,20 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) @@ -124,6 +141,13 @@ STAGE PLANS: value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 0a676bd..9a7accc 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -33,7 +33,7 @@ ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -64,6 +64,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -101,7 +105,20 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) @@ -120,6 +137,13 @@ STAGE PLANS: value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index d7f8ba4..7b7a754 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -78,7 +82,20 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/spark/vectorization_17.q.out ql/src/test/results/clientpositive/spark/vectorization_17.q.out index 878dcce..763380a 100644 --- ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -26,7 +26,7 @@ ORDER BY cbigint, cfloat PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -50,6 +50,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -79,8 +83,23 @@ STAGE PLANS: Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_7.q.out ql/src/test/results/clientpositive/spark/vectorization_7.q.out index 11f0083..3db173a 100644 --- ql/src/test/results/clientpositive/spark/vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_7.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -29,7 +29,7 @@ LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -56,6 +56,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -72,35 +76,78 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -190,7 +237,7 @@ NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -218,7 +265,7 @@ ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -245,6 +292,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -261,35 +312,78 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_8.q.out ql/src/test/results/clientpositive/spark/vectorization_8.q.out index 1d4f32b..6887087 100644 --- ql/src/test/results/clientpositive/spark/vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_8.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -27,7 +27,7 @@ LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -52,6 +52,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,35 +72,78 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator - predicate: ((cstring2 is not null and ((UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0))) or ((cfloat < -6432.0) or (cboolean1 is not null and (cdouble = 988888.0)))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 10.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 16.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) + expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: double) + key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: double), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -177,7 +224,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -203,7 +250,7 @@ ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -228,6 +275,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -244,35 +295,78 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator - predicate: ((cstring2 is not null and ((UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998))) or ((cfloat < -6432.0) or (cboolean1 is not null and (cdouble = 988888.0)))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 12.503)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 11.998)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) + expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: double) + key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: double), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index d7f8ba4..7b7a754 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -78,7 +82,20 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out index 9a6cb52..b609ab0 100644 --- ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out @@ -12,28 +12,77 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_decimal_test + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean + predicate: (cint is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdate (type: date), cdecimal (type: decimal(20,10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: date_decimal_test - Filter Operator - predicate: (cint is not null and cdouble is not null) (type: boolean) - Select Operator - expressions: cdate (type: date), cdecimal (type: decimal(20,10)) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vectorization_div0.q.out ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index a70647e..923e65e 100644 --- ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -1,27 +1,72 @@ PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: (cdouble / 0.0) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 100 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: (cdouble / 0.0) (type: double) - outputColumnNames: _col0 - Limit - Number of rows: 100 - ListSink + ListSink PREHOOK: query: select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY @@ -134,17 +179,21 @@ NULL PREHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -161,32 +210,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 17] + selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -311,16 +402,20 @@ POSTHOOK: Input: default@alltypesorc 60330397 NULL 0.000000019890470801974 PREHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -337,32 +432,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 16, 14, 17] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 1, 3, 4] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out index 23d7ab3..361384f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -76,8 +80,23 @@ STAGE PLANS: Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index 59a3be0..1f1bb30 100644 --- ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -37,7 +41,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 2347731..c7c025e 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -30,7 +30,8 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -98,7 +99,8 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -134,6 +136,10 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -150,15 +156,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) -> boolean) -> boolean predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 1, 4, 0] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE @@ -167,7 +191,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) @@ -274,7 +311,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -312,7 +350,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -345,6 +384,10 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -361,15 +404,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 5, 0] Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE @@ -378,7 +439,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) @@ -479,7 +553,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -516,7 +591,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -548,6 +624,10 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -564,15 +644,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2, 5] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE @@ -581,7 +679,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) @@ -681,7 +792,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -708,7 +820,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -730,6 +843,10 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -746,15 +863,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) outputColumnNames: ctinyint, cbigint, cint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 2, 4] Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE @@ -763,7 +898,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) @@ -843,7 +991,8 @@ PREHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -884,7 +1033,8 @@ POSTHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -920,6 +1070,10 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -936,31 +1090,73 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(20,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(20,18) Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(20,18)) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(20,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1104,7 +1300,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1144,7 +1341,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1179,6 +1377,10 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1195,31 +1397,73 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28] + selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1314,7 +1558,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1353,7 +1598,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1387,6 +1633,10 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1403,32 +1653,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) -> boolean) -> boolean predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(19,18)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28] + selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(19,18), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(19,18)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(19,18)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 75 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1594,7 +1886,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1626,7 +1919,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1653,6 +1947,10 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1669,32 +1967,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24] + selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 45 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1815,7 +2155,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1840,7 +2181,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1860,6 +2202,10 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1877,15 +2223,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) outputColumnNames: csmallint, cbigint, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 0] Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1897,7 +2262,20 @@ STAGE PLANS: Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) @@ -1916,16 +2294,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(19,18)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2008,7 +2403,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2040,7 +2436,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2067,6 +2464,10 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2084,15 +2485,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cfloat (type: float) outputColumnNames: cdouble, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 4] Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) + Group By Vectorization: + aggregators: VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFSumDouble(col 5) -> double + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 5 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2104,7 +2524,20 @@ STAGE PLANS: Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) @@ -2123,13 +2556,27 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2206,7 +2653,8 @@ PREHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2266,7 +2714,8 @@ POSTHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2321,6 +2770,10 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2338,15 +2791,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColNotEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean, SelectColumnIsNotNull(col 11) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss) -> boolean, FilterDoubleScalarLessDoubleColumn(val -3.0, col 12)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean) -> boolean, FilterDoubleColEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 10) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 6, 2, 1, 0, 4, 5] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) + Group By Vectorization: + aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 8, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2358,7 +2830,20 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) @@ -2377,16 +2862,33 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: tinyint), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: double), KEY.reducesinkkey22 (type: double), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey24 (type: double), KEY.reducesinkkey25 (type: double), KEY.reducesinkkey26 (type: double), KEY.reducesinkkey27 (type: tinyint), KEY.reducesinkkey28 (type: double), KEY.reducesinkkey29 (type: double), KEY.reducesinkkey30 (type: double), KEY.reducesinkkey31 (type: double), KEY.reducesinkkey32 (type: decimal(20,18)), KEY.reducesinkkey33 (type: double), KEY.reducesinkkey34 (type: bigint), KEY.reducesinkkey35 (type: double), KEY.reducesinkkey36 (type: bigint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey38 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 8, 38] Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2568,7 +3070,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2613,7 +3116,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2653,6 +3157,10 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2670,15 +3178,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 4, 3, 2, 5, 0, 1] Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -2690,7 +3217,20 @@ STAGE PLANS: Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) @@ -2709,13 +3249,27 @@ STAGE PLANS: value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(23,3)), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: float), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col12 (type: float), VALUE._col15 (type: bigint), VALUE._col16 (type: double), VALUE._col17 (type: decimal(24,3)), VALUE._col18 (type: decimal(25,3)), VALUE._col19 (type: double), VALUE._col20 (type: decimal(25,3)), VALUE._col21 (type: double), VALUE._col22 (type: double), VALUE._col23 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24] Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2824,12 +3378,16 @@ create table test_count(i int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_count -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2846,28 +3404,70 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2889,12 +3489,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_count #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(i) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(i) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2911,30 +3515,72 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: i (type: int) outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(i) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3030,12 +3676,16 @@ POSTHOOK: Lineage: alltypesnullorc.cstring2 SIMPLE [(alltypesnull)alltypesnull.F POSTHOOK: Lineage: alltypesnullorc.ctimestamp1 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctimestamp2 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctinyint SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3052,28 +3702,70 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3095,12 +3787,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 12288 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3117,30 +3813,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3162,12 +3900,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3184,30 +3926,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3229,12 +4013,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3251,30 +4039,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cfloat) + Group By Vectorization: + aggregators: VectorUDAFCount(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3296,12 +4126,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3318,30 +4152,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cstring1 (type: string) outputColumnNames: cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cstring1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 6) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3363,12 +4239,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3385,30 +4265,72 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cboolean1 (type: boolean) outputColumnNames: cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cboolean1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 10) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out index a12ac05..d5178e9 100644 --- ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -114,9 +118,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -124,19 +135,37 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -158,12 +187,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -178,9 +211,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -188,19 +228,37 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -227,7 +285,7 @@ PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileforma -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box @@ -235,9 +293,13 @@ POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileform -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -252,9 +314,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -262,19 +331,37 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c06ea94..dfb3eee 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -48,21 +52,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:String Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -112,7 +139,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -130,7 +157,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -148,6 +175,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,21 +193,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:String Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 7c4467a..1d91b7b 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -25,18 +29,40 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -51,12 +77,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -64,6 +101,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -71,9 +112,21 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 2, 12] + selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgLong(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 12) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE @@ -82,9 +135,22 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out index 0a81f62..3ba67df 100644 --- ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -54,7 +54,7 @@ and sin(cfloat) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -106,22 +106,68 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean + predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) + Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:Double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 30:Double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:Double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:String, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:String, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: DoubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double + Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) - Select Operator - expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 - ListSink + ListSink PREHOOK: query: select cdouble diff --git ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index a3d5947..9f256c4 100644 --- ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -33,6 +37,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 4 @@ -52,6 +64,14 @@ STAGE PLANS: 0 _col0 (type: smallint) 1 _col0 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -107,10 +127,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 2ab2541..9a40169 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchem POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] PREHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -132,7 +132,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -142,6 +142,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,6 +174,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -221,6 +233,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -251,6 +268,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -373,7 +395,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -382,13 +404,17 @@ sort by j.p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -422,6 +448,14 @@ STAGE PLANS: value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -490,6 +524,14 @@ STAGE PLANS: tag: 1 auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -541,6 +583,11 @@ STAGE PLANS: /part_orc [p2] Reducer 2 Needs Tagging: true + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -561,6 +608,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -591,6 +643,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -696,7 +753,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -704,12 +761,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -737,6 +798,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -788,6 +857,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -882,7 +956,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -894,7 +968,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -904,6 +978,10 @@ from noop(on part_orc order by p_name ) abc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -932,6 +1010,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -983,6 +1069,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1013,6 +1104,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1135,7 +1231,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1147,7 +1243,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1157,6 +1253,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1185,6 +1285,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1236,6 +1344,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1266,6 +1379,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1389,7 +1507,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1402,7 +1520,7 @@ group by p_mfgr, p_name, p_size PREHOOK: type: QUERY POSTHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1413,6 +1531,10 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1441,6 +1563,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1492,6 +1622,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1530,6 +1665,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -1655,7 +1795,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -1664,13 +1804,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1699,6 +1843,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1767,6 +1919,14 @@ STAGE PLANS: tag: 1 auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1818,6 +1978,11 @@ STAGE PLANS: /part_orc [p1] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -1852,6 +2017,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: true + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -1936,7 +2106,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -1945,13 +2115,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1983,6 +2157,14 @@ STAGE PLANS: tag: 0 auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2048,6 +2230,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2099,6 +2289,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: true + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -2136,6 +2331,11 @@ STAGE PLANS: MultiFileSpray: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -2221,7 +2421,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2230,13 +2430,17 @@ order by p_name, p_size desc) PREHOOK: type: QUERY POSTHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2279,6 +2483,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2330,6 +2540,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2360,6 +2575,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2463,7 +2683,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2474,7 +2694,7 @@ from noopwithmap(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2483,6 +2703,10 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2526,6 +2750,12 @@ STAGE PLANS: tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2577,6 +2807,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2608,6 +2843,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2728,7 +2968,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2739,7 +2979,7 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2748,6 +2988,10 @@ from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2776,6 +3020,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2827,6 +3079,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2857,6 +3114,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2977,7 +3239,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2989,7 +3251,7 @@ order by p_mfgr, p_name PREHOOK: type: QUERY POSTHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2999,6 +3261,10 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3028,6 +3294,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3079,6 +3353,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3132,6 +3411,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3170,6 +3454,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3292,7 +3581,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3307,7 +3596,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 PREHOOK: type: QUERY POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3320,6 +3609,10 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3348,6 +3641,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3399,6 +3700,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3429,6 +3735,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3549,7 +3860,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3563,7 +3874,7 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3575,6 +3886,10 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3604,6 +3919,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3672,6 +3995,14 @@ STAGE PLANS: tag: 1 auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3723,6 +4054,11 @@ STAGE PLANS: /part_orc [p1] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3757,6 +4093,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: true + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -3777,6 +4118,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3918,7 +4264,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -3926,12 +4272,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3960,6 +4310,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4011,6 +4369,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -4050,6 +4413,13 @@ STAGE PLANS: Reducer 3 Execution mode: vectorized Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -4144,7 +4514,7 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part_orc POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4152,7 +4522,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4160,6 +4530,10 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4198,6 +4572,14 @@ STAGE PLANS: value expressions: _col2 (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4249,6 +4631,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -4281,6 +4668,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) @@ -4421,7 +4813,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4437,7 +4829,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4453,6 +4845,10 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -4487,6 +4883,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4538,6 +4942,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4614,6 +5023,11 @@ STAGE PLANS: MultiFileSpray: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -4654,6 +5068,11 @@ STAGE PLANS: auto parallelism: false Reducer 5 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -4737,6 +5156,11 @@ STAGE PLANS: MultiFileSpray: false Reducer 6 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4765,6 +5189,11 @@ STAGE PLANS: auto parallelism: false Reducer 7 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4974,7 +5403,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -4993,7 +5422,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5010,6 +5439,10 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5039,6 +5472,14 @@ STAGE PLANS: value expressions: p_name (type: string), p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5090,6 +5531,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5150,6 +5596,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5188,6 +5639,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5324,7 +5780,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5343,7 +5799,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5360,6 +5816,10 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5390,6 +5850,14 @@ STAGE PLANS: value expressions: p_name (type: string), p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5441,6 +5909,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5478,6 +5951,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5508,6 +5986,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5538,6 +6021,11 @@ STAGE PLANS: auto parallelism: false Reducer 5 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5674,7 +6162,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5691,7 +6179,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5706,6 +6194,10 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5735,6 +6227,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5786,6 +6286,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5823,6 +6328,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5860,6 +6370,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5992,7 +6507,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6011,7 +6526,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6028,6 +6543,10 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6058,6 +6577,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6109,6 +6636,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6146,6 +6678,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6192,6 +6729,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6223,6 +6765,11 @@ STAGE PLANS: auto parallelism: false Reducer 5 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6359,7 +6906,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6377,7 +6924,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6393,6 +6940,10 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6422,6 +6973,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6473,6 +7032,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6526,6 +7090,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6564,6 +7133,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6698,7 +7272,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6714,7 +7288,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6728,6 +7302,10 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6757,6 +7335,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6808,6 +7394,11 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6861,6 +7452,11 @@ STAGE PLANS: auto parallelism: false Reducer 3 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6892,6 +7488,11 @@ STAGE PLANS: auto parallelism: false Reducer 4 Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 535da25..c9b04d3 100644 --- ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -28,38 +32,91 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -83,6 +140,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) @@ -96,13 +158,27 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out index bfac939..1e3be19 100644 --- ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -24,7 +24,7 @@ and cstring1 like '%' PREHOOK: type: QUERY POSTHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -46,22 +46,53 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) + Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) - Select Operator - expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - ListSink + ListSink PREHOOK: query: select substr(cstring1, 1, 2) diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 5eb896f..594225a 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -73,7 +73,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -115,25 +119,60 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -216,7 +255,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -229,7 +268,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -242,6 +281,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -258,25 +301,60 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -359,7 +437,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -372,7 +450,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -385,6 +463,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -401,25 +483,60 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -505,7 +622,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -519,7 +636,7 @@ FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY POSTHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -532,6 +649,10 @@ EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -548,25 +669,60 @@ STAGE PLANS: TableScan alias: alltypesorc_wrong Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -612,20 +768,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -642,30 +802,72 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -698,15 +900,19 @@ POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -736,20 +942,47 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: round(_col0, 3) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 3) -> 1:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -775,7 +1008,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -786,7 +1019,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -797,6 +1030,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -813,12 +1050,26 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE @@ -827,7 +1078,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) diff --git ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out index 8ce85f6..ca7b93e 100644 --- ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out @@ -97,9 +97,9 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out index 4535e66..7939a8a 100644 --- ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out @@ -27,10 +27,14 @@ POSTHOOK: query: SHOW PARTITIONS non_string_part POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@non_string_part ctinyint=__HIVE_DEFAULT_PARTITION__ -PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -48,32 +52,73 @@ STAGE PLANS: TableScan alias: non_string_part Statistics: Num rows: 3073 Data size: 351442 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4] Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -106,10 +151,14 @@ POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ 799471 NULL 1248059 NULL 1286921 NULL -PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,31 +176,72 @@ STAGE PLANS: TableScan alias: non_string_part Statistics: Num rows: 3073 Data size: 363734 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/tez/vectorization_div0.q.out ql/src/test/results/clientpositive/tez/vectorization_div0.q.out index 12b90a4..1d52b7b 100644 --- ql/src/test/results/clientpositive/tez/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_div0.q.out @@ -1,27 +1,72 @@ PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: (cdouble / 0.0) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double + Statistics: Num rows: 12288 Data size: 98304 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 100 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: (cdouble / 0.0) (type: double) - outputColumnNames: _col0 - Limit - Number of rows: 100 - ListSink + ListSink PREHOOK: query: select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY @@ -134,17 +179,21 @@ NULL PREHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,32 +211,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 17] + selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -312,16 +403,20 @@ POSTHOOK: Input: default@alltypesorc 60330397 NULL 0.000000019890470801974 PREHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -339,32 +434,74 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 16, 14, 17] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 1, 3, 4] Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index 71e470b..a3be8e5 100644 --- ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -40,14 +44,18 @@ POSTHOOK: Input: default@alltypesorc -1887561756 9531.0 PREHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -66,31 +74,67 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(column 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -132,13 +176,17 @@ POSTHOOK: Input: default@alltypesorc -64 -1600.0 -1600 -64 -200.0 -200 PREHOOK: query: -- deduped RS -explain +explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- deduped RS -explain +explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -159,9 +207,20 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: DoubleColAddDoubleScalar(column 5, value 1.0) -> 12:double Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(column 12:double) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: column 0:tinyint + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(column 12:double) output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -174,7 +233,20 @@ STAGE PLANS: TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) @@ -228,13 +300,17 @@ NULL 9370.0945309795 -47 -574.6428571428571 -46 3033.55 PREHOOK: query: -- distincts -explain +explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- distincts -explain +explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -255,8 +331,16 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: column 0:tinyint + native: false keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 @@ -265,22 +349,53 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: column 0:tinyint + native: false keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -321,12 +436,16 @@ NULL -48 -47 -46 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -347,8 +466,16 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: column 0:tinyint, column 5:double + native: false keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -357,27 +484,64 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: column 0:tinyint, column 1:double + native: false keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(column 1:double) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: column 0:tinyint + native: false keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -419,13 +583,17 @@ NULL 2932 -47 22 -46 24 PREHOOK: query: -- limit zero -explain +explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 PREHOOK: type: QUERY POSTHOOK: query: -- limit zero -explain +explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -445,13 +613,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### PREHOOK: query: -- 2MR (applied to last RS) -explain +explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- 2MR (applied to last RS) -explain +explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -471,14 +643,27 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(column 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(column 0:tinyint) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: column 5:double + native: false keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -487,14 +672,39 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(column 1:bigint) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: column 0:double + native: false keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 @@ -502,20 +712,41 @@ STAGE PLANS: Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out index bacb3bb..9f239b9 100644 --- ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out +++ ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out @@ -100,20 +100,24 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -139,6 +143,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFRegExp(Column[c2], Const string val) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -163,20 +173,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -202,6 +216,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> regexp_extract (Column[c2], Const string val_([0-9]+), Const int 1) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -226,20 +246,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -265,6 +289,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> regexp_replace (Column[c2], Const string val, Const string replaced) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -289,20 +319,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -328,6 +362,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFRegExp(Column[c2], Const string val) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 Fetch Operator @@ -352,20 +392,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -377,21 +421,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 4:String, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 6:String, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -416,20 +482,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -441,21 +511,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 4:String, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 6:String, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -480,10 +572,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -506,6 +602,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFPower(Column[key], Const int 2) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -559,20 +661,24 @@ POSTHOOK: Input: default@decimal_udf 9.8596 9.8596 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -598,6 +704,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> log (Column[value], Const decimal(20,10) 10) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -622,10 +734,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 -PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -648,6 +764,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFPower(Column[key], Const int 2) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 Fetch Operator @@ -701,20 +823,24 @@ POSTHOOK: Input: default@decimal_udf 9.8596 9.8596 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -740,6 +866,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> log (Column[value], Const decimal(20,10) 10) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 Fetch Operator @@ -764,12 +896,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -797,6 +933,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFWhen(Column[bool], Const int 1, GenericUDFOPNot(Column[bool]), Const int 0, Const void null) because hive.vectorized.adaptor.usage.mode=none + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -831,12 +977,16 @@ key2 1 key3 0 key4 1 key5 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -848,12 +998,27 @@ STAGE PLANS: TableScan alias: count_case_groupby Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:Long Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -862,9 +1027,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_aggregate_9.q.out ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 19f8093..337e554 100644 --- ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,12 +122,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: dc (type: decimal(38,18)) outputColumnNames: dc + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(dc), max(dc), sum(dc), avg(dc) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 6) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +150,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out index 3043a6c..265f429 100644 --- ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out @@ -33,10 +33,14 @@ POSTHOOK: Output: default@testvec POSTHOOK: Lineage: testvec.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: testvec.greg_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: testvec.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 PREHOOK: type: QUERY -POSTHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -65,6 +69,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) diff --git ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out index 9837b26..bf3acb1 100644 --- ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out @@ -43,17 +43,21 @@ POSTHOOK: Output: default@tbl2 POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -65,12 +69,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -78,14 +93,39 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -120,7 +160,7 @@ POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 22 PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select key, count(*) from @@ -131,7 +171,7 @@ select count(*) from ) subq2 PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select key, count(*) from @@ -141,6 +181,10 @@ select count(*) from group by key ) subq2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -153,12 +197,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -167,7 +222,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -175,7 +238,24 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -200,6 +280,14 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -245,7 +333,7 @@ POSTHOOK: Input: default@tbl2 6 PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization expression select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -264,7 +352,7 @@ on src1.key = src2.key PREHOOK: type: QUERY POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization expression select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -281,6 +369,10 @@ join ) src2 on src1.key = src2.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -294,12 +386,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -308,8 +411,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -317,8 +429,25 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -347,6 +476,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -371,12 +507,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -385,8 +532,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -394,8 +550,25 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -461,7 +634,7 @@ POSTHOOK: Input: default@tbl2 9 1 1 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -470,13 +643,17 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -488,12 +665,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 6) -> boolean predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -501,14 +689,39 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -548,7 +761,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization expression select count(*) from ( select * from @@ -562,7 +775,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization expression select count(*) from ( select * from @@ -574,6 +787,10 @@ select count(*) from join tbl2 b on subq2.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -585,12 +802,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColLessLongScalar(col 0, val 8) -> boolean, FilterLongColLessLongScalar(col 0, val 6) -> boolean) -> boolean predicate: ((key < 8) and (key < 6)) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -598,14 +826,39 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -655,7 +908,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select * from @@ -676,7 +929,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select * from @@ -695,6 +948,10 @@ select count(*) from ) subq4 on subq2.key = subq4.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -706,12 +963,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColLessLongScalar(col 0, val 8) -> boolean, FilterLongColLessLongScalar(col 0, val 6) -> boolean) -> boolean predicate: ((key < 8) and (key < 6)) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -719,14 +987,39 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -789,7 +1082,7 @@ POSTHOOK: Input: default@tbl1 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization expression select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -799,13 +1092,17 @@ PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization expression select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -817,12 +1114,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 8) -> boolean predicate: (key < 8) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -830,14 +1138,39 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -877,7 +1210,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization expression select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -886,13 +1219,17 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization expression select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -932,6 +1269,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -960,6 +1304,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1001,18 +1353,22 @@ POSTHOOK: Input: default@tbl2 22 PREHOOK: query: -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY POSTHOOK: query: -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1024,12 +1380,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 6) -> boolean predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -1037,14 +1404,39 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1080,7 +1472,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1092,7 +1484,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization expression select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1102,6 +1494,10 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1113,12 +1509,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 6) -> boolean predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -1128,14 +1535,39 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1181,7 +1613,7 @@ POSTHOOK: Input: default@tbl2 56 PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -1196,7 +1628,7 @@ on subq2.key = b.key) a PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization expression select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -1209,6 +1641,10 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1220,12 +1656,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColLessLongScalar(col 0, val 8) -> boolean, FilterLongColLessLongScalar(col 0, val 6) -> boolean) -> boolean predicate: ((key < 8) and (key < 6)) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -1233,14 +1680,39 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1308,7 +1780,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -1317,13 +1789,17 @@ insert overwrite table dest2 select key, val1, val2 PREHOOK: type: QUERY POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, val1, val2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -1348,7 +1824,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -1358,14 +1841,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1373,12 +1870,23 @@ STAGE PLANS: name: default.dest1 File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-8 Conditional Operator @@ -1587,7 +2095,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -1596,13 +2104,17 @@ insert overwrite table dest2 select key, count(*) group by key PREHOOK: type: QUERY POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization expression from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, count(*) group by key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -1622,7 +2134,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -1632,8 +2151,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1642,8 +2167,18 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -1651,8 +2186,25 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index 0b9401d..7f25903 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -69,13 +69,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -108,9 +112,16 @@ STAGE PLANS: TableScan alias: tsint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), csint (type: smallint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -118,20 +129,41 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Not empty key IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3, 5] + selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:Long) -> 5:String Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Local Work: Map Reduce Local Work @@ -179,13 +211,17 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 4 3 10 1 NoOk 4 4 10 10 Ok Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -218,9 +254,16 @@ STAGE PLANS: TableScan alias: tsint Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: rnum (type: int), csint (type: smallint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -228,23 +271,47 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Not empty key IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:Long) -> boolean predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3] Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_between_in.q.out ql/src/test/results/clientpositive/vector_between_in.q.out index 9f351b2..eab46a5 100644 --- ql/src/test/results/clientpositive/vector_between_in.q.out +++ ql/src/test/results/clientpositive/vector_between_in.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,18 +31,46 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnInList(col 3, values [-67, -171]) -> boolean predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) @@ -58,10 +90,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -73,21 +109,55 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 4:boolean) -> boolean predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -108,10 +178,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -123,18 +197,46 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> boolean predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) @@ -154,10 +256,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -169,21 +275,55 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsFalse(col 4)(children: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean) -> boolean predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -204,10 +344,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -219,18 +363,46 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 3, left -2, right 1) -> boolean predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) @@ -250,10 +422,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -265,18 +441,46 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) @@ -296,10 +500,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -311,18 +519,46 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 1, left -20, right 45.9918918919) -> boolean predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) @@ -342,10 +578,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -357,21 +597,55 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -639,12 +913,16 @@ POSTHOOK: Input: default@decimal_date_test 6172 PREHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY POSTHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -656,12 +934,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -670,9 +963,26 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -694,10 +1004,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -709,12 +1023,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -723,9 +1052,26 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -747,10 +1093,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -762,12 +1112,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 4:Long Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -776,9 +1141,26 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -800,10 +1182,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -815,12 +1201,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:Long Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 4 + native: false + projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -829,9 +1230,26 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 2110d43..ea555aa 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -97,14 +97,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type: POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -168,8 +172,18 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for SelectColumnIsNotNull argument #0 type name Binary + vectorized: false Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -201,16 +215,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### -27832781952 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -222,12 +240,26 @@ STAGE PLANS: TableScan alias: hundredorc Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: bin (type: binary) outputColumnNames: bin + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 10 + native: false + projectedOutputColumns: [0] keys: bin (type: binary) mode: hash outputColumnNames: _col0, _col1 @@ -236,9 +268,26 @@ STAGE PLANS: key expressions: _col0 (type: binary) sort order: + Map-reduce partition columns: _col0 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -304,16 +353,20 @@ POSTHOOK: Input: default@hundredorc 3 zync studies PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -349,12 +402,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -362,20 +426,40 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_bround.q.out ql/src/test/results/clientpositive/vector_bround.q.out index f92d749..dc0326a 100644 --- ql/src/test/results/clientpositive/vector_bround.q.out +++ ql/src/test/results/clientpositive/vector_bround.q.out @@ -34,10 +34,14 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY -POSTHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -61,6 +65,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_bucket.q.out ql/src/test/results/clientpositive/vector_bucket.q.out index 7a6a4da..4f47c3a 100644 --- ql/src/test/results/clientpositive/vector_bucket.q.out +++ ql/src/test/results/clientpositive/vector_bucket.q.out @@ -6,12 +6,16 @@ POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INT POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@non_orc_table -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,6 +37,14 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 5cd8085..6680550 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -97,20 +97,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -123,12 +127,27 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: i (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(50), avg(50.0), avg(50) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 2 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -141,6 +160,18 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) @@ -165,6 +196,14 @@ STAGE PLANS: Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index dfe545b..732ee52 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -47,18 +47,22 @@ val_10 10 1 val_100 200 2 val_103 206 2 val_104 208 2 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -71,12 +75,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:Long Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -85,10 +104,27 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -113,6 +149,14 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) @@ -179,18 +223,22 @@ val_97 194 2 val_96 96 1 val_95 190 2 val_92 92 1 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -203,12 +251,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:Long Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -217,10 +280,27 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: - Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -245,6 +325,14 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_char_4.q.out ql/src/test/results/clientpositive/vector_char_4.q.out index 58988bf..1c58fd2 100644 --- ql/src/test/results/clientpositive/vector_char_4.q.out +++ ql/src/test/results/clientpositive/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -144,12 +148,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out index 6528f75..af01b41 100644 --- ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out @@ -125,11 +125,15 @@ POSTHOOK: Output: default@char_join1_str_orc POSTHOOK: Lineage: char_join1_str_orc.c1 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: char_join1_str_orc.c2 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c2, type:string, comment:null), ] PREHOOK: query: -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -165,12 +169,23 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -178,16 +193,38 @@ STAGE PLANS: keys: 0 _col1 (type: char(10)) 1 _col1 (type: char(10)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(10)) @@ -225,11 +262,15 @@ POSTHOOK: Input: default@char_join1_vc1_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -265,12 +306,23 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(20)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -278,16 +330,38 @@ STAGE PLANS: keys: 0 _col1 (type: char(20)) 1 _col1 (type: char(20)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(20)) @@ -327,11 +401,15 @@ POSTHOOK: Input: default@char_join1_vc2_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -367,12 +445,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -380,16 +469,38 @@ STAGE PLANS: keys: 0 UDFToString(_col1) (type: string) 1 _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out index e1dedae..fb6c503 100644 --- ql/src/test/results/clientpositive/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,42 +66,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: + - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select key, value @@ -139,16 +123,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -156,42 +144,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: - - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select key, value @@ -236,12 +204,16 @@ create table char_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -251,55 +223,37 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: CAST( _col0 AS CHAR(12) (type: char(12)) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 Stage: Stage-2 - Stats-Aggr Operator PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_coalesce.q.out ql/src/test/results/clientpositive/vector_coalesce.q.out index 0101b66..c4556d7 100644 --- ql/src/test/results/clientpositive/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/vector_coalesce.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c @@ -8,12 +8,16 @@ LIMIT 10 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,43 +26,40 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 1045942 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cdouble is null (type: boolean) - Statistics: Num rows: 3114 Data size: 265164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cint (type: int), cfloat (type: float), csmallint (type: smallint), COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) - sort order: +++++ - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 2, 4, 1, 16] + selectExpressions: VectorCoalesce(columns [12, 6, 13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 14:String, CastLongToString(col 1) -> 15:String) -> 16:string + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 246572 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc @@ -86,18 +87,22 @@ NULL NULL -738306196 -51.0 NULL -738306196 NULL NULL -819152895 8.0 NULL -819152895 NULL NULL -827212561 8.0 NULL -827212561 NULL NULL -949587513 11.0 NULL -949587513 -PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -106,43 +111,40 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ctinyint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 37224 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double), cint (type: int), COALESCE(null,(cdouble + log2(cint)),0) (type: double) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: double), _col2 (type: int), _col3 (type: double) - sort order: +++ - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 15] + selectExpressions: VectorCoalesce(columns [12, 14, 13])(children: ConstantVectorExpression(val null) -> 12:double, DoubleColAddDoubleColumn(col 5, col 13)(children: FuncLog2LongToDouble(col 2) -> 13:double) -> 14:double, ConstantVectorExpression(val 0.0) -> 13:double) -> 15:double + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 27928 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc @@ -170,18 +172,22 @@ NULL NULL -850295959 0.0 NULL NULL -886426182 0.0 NULL NULL -899422227 0.0 NULL NULL -971543377 0.0 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -190,40 +196,39 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), 0.0 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc @@ -251,18 +256,22 @@ NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 -PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -271,43 +280,40 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ctimestamp1 is not null or ctimestamp2 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: timestamp) - sort order: +++ - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 9) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8, col 9) -> 12:timestamp + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: timestamp), KEY.reducesinkkey2 (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc @@ -335,18 +341,22 @@ NULL 1969-12-31 15:59:43.684 1969-12-31 15:59:43.684 NULL 1969-12-31 15:59:43.703 1969-12-31 15:59:43.703 NULL 1969-12-31 15:59:43.704 1969-12-31 15:59:43.704 NULL 1969-12-31 15:59:43.709 1969-12-31 15:59:43.709 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -355,40 +365,39 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), null (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc @@ -416,16 +425,20 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -434,33 +447,36 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cbigint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 27912 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: null (type: bigint), ctinyint (type: tinyint), COALESCE(null,ctinyint) (type: tinyint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3115 Data size: 21772 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 3) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 0, 14] + selectExpressions: ConstantVectorExpression(val null) -> 12:bigint, VectorCoalesce(columns [13, 0])(children: ConstantVectorExpression(val null) -> 13:tinyint, col 0) -> 14:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 56d39d9..63fab1f 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -16,18 +16,22 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: str_str_orc.str2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -96,14 +100,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -147,18 +155,22 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,12 +182,27 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4] + selectExpressions: VectorUDFAdaptor(UDFToInteger(COALESCE(str1,0)))(children: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string) -> 4:Long Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -184,9 +211,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -228,14 +272,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -247,18 +295,37 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_complex_all.q.out ql/src/test/results/clientpositive/vector_complex_all.q.out index a54a371..f318ad3 100644 --- ql/src/test/results/clientpositive/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/vector_complex_all.q.out @@ -62,14 +62,18 @@ POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] orc_create_staging.str orc_create_staging.mp orc_create_staging.lst orc_create_staging.strct PREHOOK: query: -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT * FROM orc_create_complex PREHOOK: type: QUERY POSTHOOK: query: -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT * FROM orc_create_complex POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -92,6 +96,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of Column[mp] not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -112,14 +122,18 @@ line1 {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"] {"a" line2 {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"three","b":"four"} line3 {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] {"a":"five","b":"six"} PREHOOK: query: -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM orc_create_complex PREHOOK: type: QUERY POSTHOOK: query: -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM orc_create_complex POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,18 +145,48 @@ STAGE PLANS: TableScan alias: orc_create_complex Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -174,14 +218,18 @@ POSTHOOK: Input: default@orc_create_complex c0 3 PREHOOK: query: -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str FROM orc_create_complex ORDER BY str PREHOOK: type: QUERY POSTHOOK: query: -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT str FROM orc_create_complex ORDER BY str POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -193,15 +241,39 @@ STAGE PLANS: TableScan alias: orc_create_complex Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: str (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/vector_complex_join.q.out ql/src/test/results/clientpositive/vector_complex_join.q.out index 002cdeb..d428849 100644 --- ql/src/test/results/clientpositive/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -21,13 +21,17 @@ POSTHOOK: Output: default@test POSTHOOK: Lineage: test.a SIMPLE [] POSTHOOK: Lineage: test.b EXPRESSION [] c0 c1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -85,6 +89,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map of Column[_col1] not supported + vectorized: false Local Work: Map Reduce Local Work @@ -142,13 +152,17 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test2b POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -202,6 +216,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: UDF GenericUDFIndex(Column[a], Const int 1) not supported + vectorized: false Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_count.q.out ql/src/test/results/clientpositive/vector_count.q.out index 734ef39..963d46a 100644 --- ql/src/test/results/clientpositive/vector_count.q.out +++ ql/src/test/results/clientpositive/vector_count.q.out @@ -47,10 +47,14 @@ POSTHOOK: Input: default@abcd 12 100 75 7 12 NULL 80 2 NULL 35 23 6 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,12 +66,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + projectedOutputColumns: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -76,9 +94,26 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) @@ -112,10 +147,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,12 +166,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -140,9 +193,26 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) @@ -172,10 +242,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization expression select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -187,17 +261,41 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: a (type: int), b (type: int), c (type: int) sort order: +++ Map-reduce partition columns: a (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) @@ -231,10 +329,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization expression select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -246,15 +348,39 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) diff --git ql/src/test/results/clientpositive/vector_count_distinct.q.out ql/src/test/results/clientpositive/vector_count_distinct.q.out index 22ab392..6f14bc7 100644 --- ql/src/test/results/clientpositive/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/vector_count_distinct.q.out @@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] PREHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization expression select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY POSTHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization expression select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1250,12 +1254,26 @@ STAGE PLANS: TableScan alias: web_sales Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [16] Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT ws_order_number) + Group By Vectorization: + aggregators: VectorUDAFCount(col 16) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 16 + native: false + projectedOutputColumns: [0] keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -1263,8 +1281,25 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out index 804d9e6..936f483 100644 --- ql/src/test/results/clientpositive/vector_data_types.q.out +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -183,10 +187,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -198,17 +206,41 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index b022435..6ba47d5 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -23,7 +23,7 @@ POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -33,13 +33,17 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -51,12 +55,26 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -65,9 +83,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) @@ -123,7 +158,7 @@ POSTHOOK: Input: default@decimal_vgby 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 PREHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby @@ -131,13 +166,17 @@ EXPLAIN SELECT cint, HAVING COUNT(*) > 1 PREHOOK: type: QUERY POSTHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -149,12 +188,27 @@ STAGE PLANS: TableScan alias: decimal_vgby Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 3 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -166,6 +220,18 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) diff --git ql/src/test/results/clientpositive/vector_decimal_cast.q.out ql/src/test/results/clientpositive/vector_decimal_cast.q.out index aee5e02..8fb516a 100644 --- ql/src/test/results/clientpositive/vector_decimal_cast.q.out +++ ql/src/test/results/clientpositive/vector_decimal_cast.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13,24 +17,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 5) -> boolean, SelectColumnIsNotNull(col 2) -> boolean, SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 8) -> boolean) -> boolean predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 10, 8, 12, 13, 14, 15] + selectExpressions: CastDoubleToDecimal(col 5) -> 12:decimal(20,10), CastLongToDecimal(col 2) -> 13:decimal(23,14), CastLongToDecimal(col 10) -> 14:decimal(5,2), CastTimestampToDecimal(col 8) -> 15:decimal(15,0) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/vector_decimal_expressions.q.out index 8af5a1c..9afecca 100644 --- ql/src/test/results/clientpositive/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/vector_decimal_expressions.q.out @@ -15,14 +15,18 @@ POSTHOOK: Output: default@decimal_test POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -34,19 +38,48 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 12288 Data size: 2128368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1, val 0) -> boolean, FilterDecimalColLessDecimalScalar(col 1, val 12345.5678) -> boolean, FilterDecimalColNotEqualDecimalScalar(col 2, val 0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 2, val 1000) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((cdecimal1 > 0) and (cdecimal1 < 12345.5678) and (cdecimal2 <> 0) and (cdecimal2 > 1000) and cdouble is not null) (type: boolean) Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdecimal1 + cdecimal2) (type: decimal(25,14)), (cdecimal1 - (2 * cdecimal2)) (type: decimal(26,14)), ((cdecimal1 + 2.34) / cdecimal2) (type: decimal(38,23)), (cdecimal1 * (cdecimal2 / 3.4)) (type: decimal(38,28)), (cdecimal1 % 10) (type: decimal(12,10)), UDFToInteger(cdecimal1) (type: int), UDFToShort(cdecimal2) (type: smallint), UDFToByte(cdecimal2) (type: tinyint), UDFToLong(cdecimal1) (type: bigint), UDFToBoolean(cdecimal1) (type: boolean), UDFToDouble(cdecimal2) (type: double), UDFToFloat(cdecimal1) (type: float), UDFToString(cdecimal2) (type: string), CAST( cdecimal1 AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + selectExpressions: DecimalColAddDecimalColumn(col 1, col 2) -> 3:decimal(25,14), DecimalColSubtractDecimalColumn(col 1, col 4)(children: DecimalScalarMultiplyDecimalColumn(val 2, col 2) -> 4:decimal(25,14)) -> 5:decimal(26,14), DecimalColDivideDecimalColumn(col 6, col 2)(children: DecimalColAddDecimalScalar(col 1, val 2.34) -> 6:decimal(21,10)) -> 7:decimal(38,23), DecimalColMultiplyDecimalColumn(col 1, col 8)(children: DecimalColDivideDecimalScalar(col 2, val 3.4) -> 8:decimal(28,18)) -> 9:decimal(38,28), DecimalColModuloDecimalScalar(col 1, val 10) -> 10:decimal(12,10), CastDecimalToLong(col 1) -> 11:long, CastDecimalToLong(col 2) -> 12:long, CastDecimalToLong(col 2) -> 13:long, CastDecimalToLong(col 1) -> 14:long, CastDecimalToBoolean(col 1) -> 15:long, CastDecimalToDouble(col 2) -> 16:double, CastDecimalToDouble(col 1) -> 17:double, CastDecimalToString(col 2) -> 18:String, CastDecimalToTimestamp(col 1) -> 19:timestamp Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(25,14)), _col1 (type: decimal(26,14)), _col2 (type: decimal(38,23)), _col3 (type: decimal(38,28)), _col4 (type: decimal(12,10)), _col5 (type: int), _col6 (type: smallint), _col7 (type: tinyint), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: double), _col11 (type: float), _col12 (type: string), _col13 (type: timestamp) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(25,14)), KEY.reducesinkkey1 (type: decimal(26,14)), KEY.reducesinkkey2 (type: decimal(38,23)), KEY.reducesinkkey3 (type: decimal(38,28)), KEY.reducesinkkey4 (type: decimal(12,10)), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: smallint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: string), KEY.reducesinkkey13 (type: timestamp) diff --git ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out index c69b7af..2991cb0 100644 --- ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -113,12 +117,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:long) -> boolean predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -126,16 +141,33 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL outputColumnNames: _col0, _col1 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out index bce8f4f..7d314c6 100644 --- ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out +++ ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out @@ -14,7 +14,7 @@ POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) @@ -53,7 +53,7 @@ and sin(cdecimal1) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdecimal1 ,Round(cdecimal1, 2) @@ -90,6 +90,10 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cdecimal1) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -101,21 +105,44 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 12288 Data size: 2201752 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 4, val 0)(children: LongColModuloLongScalar(col 0, val 500) -> 4:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 6, val -1.0)(children: FuncSinDoubleToDouble(col 5)(children: CastDecimalToDouble(col 2) -> 5:double) -> 6:double) -> boolean) -> boolean predicate: (((cbigint % 500) = 0) and (sin(cdecimal1) >= -1.0)) (type: boolean) Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)), round(cdecimal1, 2) (type: decimal(13,2)), round(cdecimal1) (type: decimal(11,0)), floor(cdecimal1) (type: decimal(11,0)), ceil(cdecimal1) (type: decimal(11,0)), round(exp(cdecimal1), 58) (type: double), ln(cdecimal1) (type: double), log10(cdecimal1) (type: double), log2(cdecimal1) (type: double), log2((cdecimal1 - 15601)) (type: double), log(2, cdecimal1) (type: double), power(log2(cdecimal1), 2) (type: double), power(log2(cdecimal1), 2) (type: double), sqrt(cdecimal1) (type: double), abs(cdecimal1) (type: decimal(20,10)), sin(cdecimal1) (type: double), asin(cdecimal1) (type: double), cos(cdecimal1) (type: double), acos(cdecimal1) (type: double), atan(cdecimal1) (type: double), degrees(cdecimal1) (type: double), radians(cdecimal1) (type: double), cdecimal1 (type: decimal(20,10)), (- cdecimal1) (type: decimal(20,10)), sign(cdecimal1) (type: int), cos(((- sin(log(cdecimal1))) + 3.14159)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 7, 8, 9, 10, 5, 11, 12, 13, 15, 6, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 2, 28, 4, 29] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 2, decimalPlaces 2) -> 7:decimal(13,2), FuncRoundDecimalToDecimal(col 2) -> 8:decimal(11,0), FuncFloorDecimalToDecimal(col 2) -> 9:decimal(11,0), FuncCeilDecimalToDecimal(col 2) -> 10:decimal(11,0), RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 58)(children: FuncExpDoubleToDouble(col 5)(children: CastDecimalToDouble(col 2) -> 5:double) -> 6:double) -> 5:double, FuncLnDoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 11:double, FuncLog10DoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 12:double, FuncLog2DoubleToDouble(col 6)(children: CastDecimalToDouble(col 2) -> 6:double) -> 13:double, FuncLog2DoubleToDouble(col 6)(children: CastDecimalToDouble(col 14)(children: DecimalColSubtractDecimalScalar(col 2, val 15601) -> 14:decimal(21,10)) -> 6:double) -> 15:double, VectorUDFAdaptor(log(2, cdecimal1)) -> 6:Double, VectorUDFAdaptor(power(log2(cdecimal1), 2))(children: FuncLog2DoubleToDouble(col 16)(children: CastDecimalToDouble(col 2) -> 16:double) -> 17:double) -> 16:Double, VectorUDFAdaptor(power(log2(cdecimal1), 2))(children: FuncLog2DoubleToDouble(col 17)(children: CastDecimalToDouble(col 2) -> 17:double) -> 18:double) -> 17:Double, FuncSqrtDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 19:double, FuncAbsDecimalToDecimal(col 2) -> 20:decimal(20,10), FuncSinDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 21:double, FuncASinDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 22:double, FuncCosDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 23:double, FuncACosDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 24:double, FuncATanDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 25:double, FuncDegreesDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 26:double, FuncRadiansDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 27:double, FuncNegateDecimalToDecimal(col 2) -> 28:decimal(20,10), FuncSignDecimalToLong(col 2) -> 4:long, FuncCosDoubleToDouble(col 18)(children: DoubleColAddDoubleScalar(col 29, val 3.14159)(children: DoubleColUnaryMinus(col 18)(children: FuncSinDoubleToDouble(col 29)(children: FuncLnDoubleToDouble(col 18)(children: CastDecimalToDouble(col 2) -> 18:double) -> 29:double) -> 18:double) -> 29:double) -> 18:double) -> 29:double Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_decimal_precision.q.out ql/src/test/results/clientpositive/vector_decimal_precision.q.out index c5f9e4d..2f8d0c9 100644 --- ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -545,10 +545,14 @@ NULL NULL 123456789.0123456789 15241578753238836.75019051998750190521 1234567890.1234560000 NULL 1234567890.1234567890 NULL -PREHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -560,12 +564,26 @@ STAGE PLANS: TableScan alias: decimal_precision Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(20,10)) outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(dec), sum(dec) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimal(col 0) -> struct, VectorUDAFSumDecimal(col 0) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE @@ -574,6 +592,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1) diff --git ql/src/test/results/clientpositive/vector_decimal_round.q.out ql/src/test/results/clientpositive/vector_decimal_round.q.out index 7dc885e..8fc1f6a 100644 --- ql/src/test/results/clientpositive/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/vector_decimal_round.q.out @@ -30,12 +30,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -56,6 +60,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -84,12 +96,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -110,6 +126,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) @@ -166,12 +190,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -192,6 +220,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -220,12 +256,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -246,6 +286,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) @@ -302,12 +350,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -319,16 +371,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -357,12 +434,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -374,16 +455,40 @@ STAGE PLANS: TableScan alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) diff --git ql/src/test/results/clientpositive/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/vector_decimal_round_2.q.out index 4924bff..09da361 100644 --- ql/src/test/results/clientpositive/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_round_2.q.out @@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), @@ -51,13 +51,17 @@ POSTHOOK: query: -- EXPLAIN -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8) FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,16 +73,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_1_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: round(dec) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 5:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 13:decimal(21,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)) @@ -144,7 +173,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### 125.315000000000000000 -125.315000000000000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -154,7 +183,7 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -164,6 +193,10 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -175,16 +208,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_2_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(pos) (type: decimal(21,0)), round(pos, 0) (type: decimal(21,0)), round(pos, 1) (type: decimal(22,1)), round(pos, 2) (type: decimal(23,2)), round(pos, 3) (type: decimal(24,3)), round(pos, 4) (type: decimal(25,4)), round(pos, -1) (type: decimal(21,0)), round(pos, -2) (type: decimal(21,0)), round(pos, -3) (type: decimal(21,0)), round(pos, -4) (type: decimal(21,0)), round(neg) (type: decimal(21,0)), round(neg, 0) (type: decimal(21,0)), round(neg, 1) (type: decimal(22,1)), round(neg, 2) (type: decimal(23,2)), round(neg, 3) (type: decimal(24,3)), round(neg, 4) (type: decimal(25,4)), round(neg, -1) (type: decimal(21,0)), round(neg, -2) (type: decimal(21,0)), round(neg, -3) (type: decimal(21,0)), round(neg, -4) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 7:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 11:decimal(21,0), FuncRoundDecimalToDecimal(col 1) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 0) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 1) -> 14:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 2) -> 15:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 3) -> 16:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 4) -> 17:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -1) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -2) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -3) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -4) -> 21:decimal(21,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(25,4)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(22,1)), VALUE._col12 (type: decimal(23,2)), VALUE._col13 (type: decimal(24,3)), VALUE._col14 (type: decimal(25,4)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(21,0)), VALUE._col17 (type: decimal(21,0)), VALUE._col18 (type: decimal(21,0)) @@ -255,7 +313,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### 3.141592653589793000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -276,7 +334,7 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -297,6 +355,10 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -308,16 +370,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_3_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: round(dec, -15) (type: decimal(21,0)), round(dec, -16) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -13) (type: decimal(21,0)), round(dec, 4) (type: decimal(25,4)), round(dec, 5) (type: decimal(26,5)), round(dec, 6) (type: decimal(27,6)), round(dec, 7) (type: decimal(28,7)), round(dec, 8) (type: decimal(29,8)), round(dec, 9) (type: decimal(30,9)), round(dec, 10) (type: decimal(31,10)), round(dec, 11) (type: decimal(32,11)), round(dec, 12) (type: decimal(33,12)), round(dec, 13) (type: decimal(34,13)), round(dec, -14) (type: decimal(21,0)), round(dec, 14) (type: decimal(35,14)), round(dec, 15) (type: decimal(36,15)), round(dec, 16) (type: decimal(37,16)), round(dec, -11) (type: decimal(21,0)), round(dec, -12) (type: decimal(21,0)), round(dec, -9) (type: decimal(21,0)), round(dec, -10) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col31, _col32, _col33, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -15) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -16) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 10:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 11:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 12:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -13) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 14:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 5) -> 15:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 6) -> 16:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 7) -> 17:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 8) -> 18:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 19:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 10) -> 20:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 11) -> 21:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 12) -> 22:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 13) -> 23:decimal(34,13), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -14) -> 24:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 14) -> 25:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 15) -> 26:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 16) -> 27:decimal(37,16), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -11) -> 28:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -12) -> 29:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -9) -> 30:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -10) -> 31:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 32:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 33:decimal(21,0) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(21,0)), VALUE._col2 (type: decimal(21,0)), VALUE._col3 (type: decimal(21,0)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)), VALUE._col12 (type: decimal(21,0)), VALUE._col13 (type: decimal(21,0)), VALUE._col14 (type: decimal(21,0)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(22,1)), VALUE._col17 (type: decimal(23,2)), VALUE._col18 (type: decimal(24,3)), VALUE._col19 (type: decimal(25,4)), VALUE._col20 (type: decimal(26,5)), VALUE._col21 (type: decimal(27,6)), VALUE._col22 (type: decimal(28,7)), VALUE._col23 (type: decimal(29,8)), VALUE._col24 (type: decimal(30,9)), VALUE._col25 (type: decimal(31,10)), VALUE._col26 (type: decimal(32,11)), VALUE._col27 (type: decimal(33,12)), VALUE._col28 (type: decimal(34,13)), VALUE._col28 (type: decimal(34,13)), VALUE._col29 (type: decimal(35,14)), VALUE._col30 (type: decimal(36,15)), VALUE._col31 (type: decimal(37,16)) @@ -411,14 +498,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_4_orc #### A masked pattern was here #### 1809242.315111134400000000 -1809242.315111134400000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -430,16 +521,41 @@ STAGE PLANS: TableScan alias: decimal_tbl_4_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 2:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 9) -> 3:decimal(30,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(30,9)), VALUE._col0 (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9)) diff --git ql/src/test/results/clientpositive/vector_decimal_udf.q.out ql/src/test/results/clientpositive/vector_decimal_udf.q.out index 1fbc05a..f925b57 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -49,11 +49,15 @@ POSTHOOK: Output: default@decimal_udf POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key + key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key + key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -65,18 +69,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key + key) (type: decimal(21,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColAddDecimalColumn(col 0, col 0) -> 2:decimal(21,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -130,10 +153,14 @@ NULL 2.0000000000 -2469135780.2469135780 2469135780.2469135600 -PREHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,18 +172,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key + CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DecimalColAddDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(21,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -210,10 +256,14 @@ NULL 2.0000000000 -2469135780.1234567890 2469135780.1234567800 -PREHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -225,18 +275,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) + (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColAddDoubleColumn(col 2, col 4)(children: CastDecimalToDouble(col 0) -> 2:double, DoubleColDivideDoubleScalar(col 3, val 2.0)(children: CastLongToDouble(col 1) -> 3:double) -> 4:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -290,10 +359,14 @@ NULL 1.5 -1.8518518351234567E9 1.8518518351234567E9 -PREHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key + '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -305,18 +378,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) + 1.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColAddDoubleScalar(col 2, val 1.0)(children: CastDecimalToDouble(col 0) -> 2:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -371,11 +463,15 @@ NULL -1.2345678891234567E9 1.2345678911234567E9 PREHOOK: query: -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key - key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key - key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -387,18 +483,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key - key) (type: decimal(21,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColSubtractDecimalColumn(col 0, col 0) -> 2:decimal(21,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -452,10 +567,14 @@ NULL 0.0000000000 0.0000000000 0.0000000000 -PREHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -467,18 +586,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key - CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DecimalColSubtractDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(21,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -532,10 +670,14 @@ NULL 0.0000000000 -0.1234567890 0.1234567800 -PREHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -547,18 +689,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) - (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColSubtractDoubleColumn(col 2, col 4)(children: CastDecimalToDouble(col 0) -> 2:double, DoubleColDivideDoubleScalar(col 3, val 2.0)(children: CastLongToDouble(col 1) -> 3:double) -> 4:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -612,10 +773,14 @@ NULL 0.5 -6.172839451234567E8 6.172839451234567E8 -PREHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key - '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -627,18 +792,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) - 1.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColSubtractDoubleScalar(col 2, val 1.0)(children: CastDecimalToDouble(col 0) -> 2:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -693,11 +877,15 @@ NULL -1.2345678911234567E9 1.2345678891234567E9 PREHOOK: query: -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key * key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT key * key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -709,18 +897,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key * key) (type: decimal(38,20)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColMultiplyDecimalColumn(col 0, col 0) -> 2:decimal(38,20) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -774,10 +981,14 @@ NULL 1.00000000000000000000 NULL NULL -PREHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key, value FROM DECIMAL_UDF where key * value > 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key, value FROM DECIMAL_UDF where key * value > 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -789,21 +1000,43 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColGreaterDecimalScalar(col 3, val 0)(children: DecimalColMultiplyDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(31,10)) -> boolean predicate: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -842,10 +1075,14 @@ POSTHOOK: Input: default@decimal_udf 1.0000000000 1 -1234567890.1234567890 -1234567890 1234567890.1234567800 1234567890 -PREHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -857,18 +1094,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key * CAST( value AS decimal(10,0))) (type: decimal(31,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DecimalColMultiplyDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(31,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -922,10 +1178,14 @@ NULL 1.0000000000 1524157875171467887.5019052100 1524157875171467876.3907942000 -PREHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -937,18 +1197,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) * (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColMultiplyDoubleColumn(col 2, col 4)(children: CastDecimalToDouble(col 0) -> 2:double, DoubleColDivideDoubleScalar(col 3, val 2.0)(children: CastLongToDouble(col 1) -> 3:double) -> 4:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1002,10 +1281,14 @@ NULL 0.5 7.6207893758573389E17 7.6207893758573389E17 -PREHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * '2.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key * '2.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1017,18 +1300,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (UDFToDouble(key) * 2.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColMultiplyDoubleScalar(col 2, val 2.0)(children: CastDecimalToDouble(col 0) -> 2:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1083,11 +1385,15 @@ NULL -2.4691357802469134E9 2.4691357802469134E9 PREHOOK: query: -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +EXPLAIN VECTORIZATION EXPRESSION SELECT key / 0 FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +EXPLAIN VECTORIZATION EXPRESSION SELECT key / 0 FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1099,21 +1405,43 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (key / 0) (type: decimal(28,18)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColDivideDecimalScalar(col 0, val 0) -> 2:decimal(28,18) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1130,10 +1458,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / NULL FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / NULL FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1159,6 +1491,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: org.apache.hadoop.hive.ql.metadata.HiveException: Could not instantiate DoubleColDivideDoubleScalar with arguments arguments: [0, ConstantVectorExpression(val null) -> 1:double, 2], argument classes: [Integer, ConstantVectorExpression, Integer], exception: java.lang.IllegalArgumentException stack trace: sun.reflect.GeneratedConstructorAccessor.newInstance(Unknown Source), sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45), java.lang.reflect.Constructor.newInstance(Constructor.java:526), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.instantiateExpression(VectorizationContext.java:1335), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.createVectorExpression(VectorizationContext.java:1233), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressionForUdf(VectorizationContext.java:1198), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getGenericUdfVectorExpression(VectorizationContext.java:1400), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:542), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2102), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateExprNodeDesc(Vectorizer.java:2092), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateSelectOperator(Vectorizer.java:1812), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateMapWorkOperator(Vectorizer.java:1647), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$MapWorkValidationNodeProcessor.process(Vectorizer.java:1282), org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90), org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105), ... + vectorized: false Stage: Stage-0 Fetch Operator @@ -1175,10 +1513,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1190,21 +1532,44 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColNotEqualDecimalScalar(col 0, val 0) -> boolean predicate: (key <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / key) (type: decimal(38,24)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DecimalColDivideDecimalColumn(col 0, col 0) -> 2:decimal(38,24) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1254,10 +1619,14 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000000 1.000000000000000000000000 1.000000000000000000000000 -PREHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1269,21 +1638,44 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColNotEqualLongScalar(col 1, val 0) -> boolean predicate: (value <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DecimalColDivideDecimalColumn(col 0, col 2)(children: CastLongToDecimal(col 1) -> 2:decimal(10,0)) -> 3:decimal(31,21) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1323,10 +1715,14 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000 1.000000000100000000000 1.000000000099999992710 -PREHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1338,21 +1734,44 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColNotEqualLongScalar(col 1, val 0) -> boolean predicate: (value <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: DoubleColDivideDoubleColumn(col 2, col 4)(children: CastDecimalToDouble(col 0) -> 2:double, DoubleColDivideDoubleScalar(col 3, val 2.0)(children: CastLongToDouble(col 1) -> 3:double) -> 4:double) -> 3:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1392,10 +1811,14 @@ POSTHOOK: Input: default@decimal_udf 2.0 2.0000000002 2.0000000002 -PREHOOK: query: EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1407,18 +1830,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (1.0 + (UDFToDouble(key) / 2.0)) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: DoubleScalarAddDoubleColumn(val 1.0, col 3)(children: DoubleColDivideDoubleScalar(col 2, val 2.0)(children: CastDecimalToDouble(col 0) -> 2:double) -> 3:double) -> 2:double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1473,11 +1915,15 @@ NULL -6.172839440617284E8 6.172839460617284E8 PREHOOK: query: -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT abs(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT abs(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1489,18 +1935,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: abs(key) (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: FuncAbsDecimalToDecimal(col 0) -> 2:decimal(20,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1555,11 +2020,15 @@ NULL 1234567890.1234567890 1234567890.1234567800 PREHOOK: query: -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value PREHOOK: type: QUERY POSTHOOK: query: -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1572,12 +2041,26 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(key), count(key), avg(key) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0) -> decimal(38,18), VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgDecimal(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1589,6 +2072,18 @@ STAGE PLANS: Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) @@ -1616,6 +2111,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(38,23)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,23)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) @@ -1661,11 +2164,15 @@ POSTHOOK: Input: default@decimal_udf 4400 -4400.00000000000000000000000 -4400.00000000000000 -4400.0000000000 1234567890 1234567890.12345678000000000000000 1234567890.12345678000000 1234567890.1234567800 PREHOOK: query: -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT -key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT -key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1677,18 +2184,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (- key) (type: decimal(20,10)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: FuncNegateDecimalToDecimal(col 0) -> 2:decimal(20,10) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1743,27 +2269,62 @@ NULL 1234567890.1234567890 -1234567890.1234567800 PREHOOK: query: -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT +key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: decimal(20,10)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1894,11 +2455,15 @@ NULL -1234567890 1234567891 PREHOOK: query: -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT FLOOR(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT FLOOR(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1910,18 +2475,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: floor(key) (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: FuncFloorDecimalToDecimal(col 0) -> 2:decimal(11,0) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1976,11 +2560,15 @@ NULL -1234567891 1234567890 PREHOOK: query: -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT ROUND(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT ROUND(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1992,18 +2580,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: round(key, 2) (type: decimal(13,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 2:decimal(13,2) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -2058,11 +2665,15 @@ NULL -1234567890.12 1234567890.12 PREHOOK: query: -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2074,18 +2685,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: VectorUDFAdaptor(power(key, 2)) -> 2:Double Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -2140,11 +2770,15 @@ NULL 1.52415787532388352E18 1.52415787532388352E18 PREHOOK: query: -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2156,18 +2790,37 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ((key + 1) % (key / 2)) (type: decimal(28,18)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + selectExpressions: DecimalColModuloDecimalColumn(col 2, col 3)(children: DecimalColAddDecimalScalar(col 0, val 1) -> 2:decimal(21,10), DecimalColDivideDecimalScalar(col 0, val 2) -> 3:decimal(28,18)) -> 4:decimal(28,18) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -2222,11 +2875,15 @@ NULL -617283944.061728394500000000 1.000000000000000000 PREHOOK: query: -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY POSTHOOK: query: -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2238,12 +2895,26 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev(key), variance(key) + Group By Vectorization: + aggregators: VectorUDAFStdPopDecimal(col 0) -> struct, VectorUDAFVarPopDecimal(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2255,6 +2926,18 @@ STAGE PLANS: Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev(VALUE._col0), variance(VALUE._col1) @@ -2302,11 +2985,15 @@ POSTHOOK: Input: default@decimal_udf 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY POSTHOOK: query: -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION EXPRESSION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2318,12 +3005,26 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(key), var_samp(key) + Group By Vectorization: + aggregators: VectorUDAFStdSampDecimal(col 0) -> struct, VectorUDAFVarSampDecimal(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2335,6 +3036,18 @@ STAGE PLANS: Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) @@ -2382,11 +3095,15 @@ POSTHOOK: Input: default@decimal_udf 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2411,6 +3128,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: array) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF histogram_numeric not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: histogram_numeric(VALUE._col0) @@ -2441,11 +3168,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### [{"x":-1.2345678901234567E9,"y":1.0},{"x":-144.50057142857142,"y":35.0},{"x":1.2345678901234567E9,"y":1.0}] PREHOOK: query: -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2457,20 +3188,49 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(key) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 0) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -2501,11 +3261,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -1234567890.1234567890 PREHOOK: query: -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2517,20 +3281,49 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(key) + Group By Vectorization: + aggregators: VectorUDAFMaxDecimal(col 0) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -2561,11 +3354,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 1234567890.1234567800 PREHOOK: query: -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2577,20 +3374,49 @@ STAGE PLANS: TableScan alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(key) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/vector_decimal_udf2.q.out index 4e24fa6..d75a9f1 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf2.q.out @@ -48,14 +48,18 @@ POSTHOOK: Input: default@decimal_udf2_txt POSTHOOK: Output: default@decimal_udf2 POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,21 +71,44 @@ STAGE PLANS: TableScan alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8] + selectExpressions: ConstantVectorExpression(val NaN) -> 2:double, ConstantVectorExpression(val NaN) -> 3:double, ConstantVectorExpression(val 1.4711276743037347) -> 4:double, ConstantVectorExpression(val -0.8390715290764524) -> 5:double, ConstantVectorExpression(val -0.5440211108893698) -> 6:double, ConstantVectorExpression(val 0.6483608274590866) -> 7:double, ConstantVectorExpression(val 0.17453292519943295) -> 8:double Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -100,20 +127,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### NaN NaN 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,21 +156,44 @@ STAGE PLANS: TableScan alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 2:double, ConstantVectorExpression(val 2.302585092994046) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 1.0) -> 5:double, FuncLogWithBaseLongToDouble(col 1) -> 6:double, VectorUDFAdaptor(log(value, 10)) -> 7:Double, ConstantVectorExpression(val 1.0) -> 8:double, ConstantVectorExpression(val 3.1622776601683795) -> 9:double Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_distinct_2.q.out ql/src/test/results/clientpositive/vector_distinct_2.q.out index dba2e6e..3685242 100644 --- ql/src/test/results/clientpositive/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/vector_distinct_2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -122,11 +126,24 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -135,8 +152,25 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) diff --git ql/src/test/results/clientpositive/vector_elt.q.out ql/src/test/results/clientpositive/vector_elt.q.out index 08ca167..233255a 100644 --- ql/src/test/results/clientpositive/vector_elt.q.out +++ ql/src/test/results/clientpositive/vector_elt.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -17,24 +21,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (ctinyint > 0) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 6, 2, 16] + selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -64,7 +94,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -77,7 +107,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -90,25 +120,64 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + selectExpressions: ConstantVectorExpression(val defg) -> 12:string, ConstantVectorExpression(val cc) -> 13:string, ConstantVectorExpression(val abc) -> 14:string, ConstantVectorExpression(val 2) -> 15:string, ConstantVectorExpression(val 12345) -> 16:string, ConstantVectorExpression(val 123456789012) -> 17:string, ConstantVectorExpression(val 1.25) -> 18:string, ConstantVectorExpression(val 16.0) -> 19:string, ConstantVectorExpression(val null) -> 20:string, ConstantVectorExpression(val null) -> 21:string + Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index 8f694da..4f60048 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- HIVE- -explain +explain vectorization expression select count (distinct cint) from alltypesorc where cstring1 PREHOOK: type: QUERY POSTHOOK: query: -- HIVE- -explain +explain vectorization expression select count (distinct cint) from alltypesorc where cstring1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -17,15 +21,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13)(children: CastLongToBooleanViaLongToLong(col 12)(children: StringLength(col 6) -> 12:Long) -> 13:long) -> boolean predicate: cstring1 (type: string) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -33,8 +55,25 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) @@ -64,12 +103,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 6041 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -81,11 +124,25 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastLongToBooleanViaLongToLong(col 2) -> 12:long) -> boolean predicate: cint (type: int) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -93,8 +150,25 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) @@ -124,12 +198,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 6082 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cfloat PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -141,15 +219,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastDoubleToBooleanViaDoubleToLong(col 4) -> 12:long) -> boolean predicate: cfloat (type: float) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -157,8 +253,25 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) @@ -188,12 +301,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 3022 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count (distinct cint) from alltypesorc where ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -205,15 +322,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 12)(children: CastTimestampToBoolean(col 8) -> 12:long) -> boolean predicate: ctimestamp1 (type: timestamp) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false + projectedOutputColumns: [0] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -221,8 +356,25 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index 8041511..37e0a38 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -43,16 +47,41 @@ STAGE PLANS: TableScan alias: srcorc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: substr(key, 1, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: StringSubstrColStartLen(col 0, start 0, length 1) -> 2:string Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -75,6 +104,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index 63fe8f3..5fb4b00 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -43,16 +47,41 @@ STAGE PLANS: TableScan alias: srcorc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: substr(value, 5, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + selectExpressions: StringSubstrColStartLen(col 1, start 4, length 1) -> 2:string Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -75,6 +104,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_3.q.out ql/src/test/results/clientpositive/vector_groupby_3.q.out index 38eeeef..e3425db 100644 --- ql/src/test/results/clientpositive/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/vector_groupby_3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -122,12 +126,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 8 + native: false + projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -136,9 +154,26 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 3468657..1b3abe3 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -1,6 +1,6 @@ Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization expression select * from src where not key in @@ -8,13 +8,17 @@ where not key in order by key PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization expression select * from src where not key in (select key from src) order by key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-8 depends on stages: Stage-4 @@ -42,6 +46,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -127,8 +139,16 @@ STAGE PLANS: sort order: + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index 9e0abd4..d47a30e 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -211,7 +211,7 @@ POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sa POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number from @@ -219,7 +219,7 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number from @@ -227,6 +227,10 @@ from group by ss_ticket_number limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -238,11 +242,24 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9 + native: false + projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -251,9 +268,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -315,7 +349,7 @@ POSTHOOK: Input: default@store_sales 18 19 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(ss_ticket_number) m from @@ -327,7 +361,7 @@ from group by ss_ticket_number order by m PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(ss_ticket_number) m from @@ -339,6 +373,10 @@ from group by ss_ticket_number order by m POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -351,11 +389,24 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9 + native: false + projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -364,8 +415,25 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -397,6 +465,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -524,7 +600,7 @@ POSTHOOK: Input: default@store_sales 80 81 82 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -536,7 +612,7 @@ from group by ss_ticket_number order by ss_ticket_number PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -548,6 +624,10 @@ from group by ss_ticket_number order by ss_ticket_number POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -560,12 +640,26 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 2, 10] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9, col 2 + native: false + projectedOutputColumns: [0] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -574,9 +668,26 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -610,6 +721,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) @@ -737,7 +856,7 @@ POSTHOOK: Input: default@store_sales 80 151471 704 81 105109 429 82 55611 254 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from @@ -749,7 +868,7 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ss_ticket_number, ss_item_sk, sum(q) from @@ -761,6 +880,10 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -773,12 +896,26 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 2, 10] Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 9, col 2 + native: false + projectedOutputColumns: [0] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -787,9 +924,26 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -823,6 +977,14 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index 4207c19..cb61b18 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -127,16 +127,20 @@ POSTHOOK: Lineage: store.s_street_type SIMPLE [(store_txt)store_txt.FieldSchema( POSTHOOK: Lineage: store.s_suite_number SIMPLE [(store_txt)store_txt.FieldSchema(name:s_suite_number, type:string, comment:null), ] POSTHOOK: Lineage: store.s_tax_precentage SIMPLE [(store_txt)store_txt.FieldSchema(name:s_tax_precentage, type:decimal(5,2), comment:null), ] POSTHOOK: Lineage: store.s_zip SIMPLE [(store_txt)store_txt.FieldSchema(name:s_zip, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s_store_id from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s_store_id from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,6 +166,16 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -202,16 +216,20 @@ AAAAAAAAEAAAAAAA AAAAAAAAHAAAAAAA AAAAAAAAIAAAAAAA AAAAAAAAKAAAAAAA -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s_store_id, GROUPING__ID from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -237,6 +255,16 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) diff --git ql/src/test/results/clientpositive/vector_if_expr.q.out ql/src/test/results/clientpositive/vector_if_expr.q.out index 0a82be9..73cbcdf 100644 --- ql/src/test/results/clientpositive/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/vector_if_expr.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -15,19 +19,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsTrue(col 10) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: (cboolean1 and cboolean1 is not null) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 12] + selectExpressions: IfExprStringScalarStringScalar(col 10, val first, val second) -> 12:String Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index 697d422..176301e 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -181,16 +181,20 @@ POSTHOOK: Lineage: customer_demographics.cd_gender SIMPLE [(customer_demographic POSTHOOK: Lineage: customer_demographics.cd_marital_status SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_marital_status, type:string, comment:null), ] POSTHOOK: Lineage: customer_demographics.cd_purchase_estimate SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_purchase_estimate, type:int, comment:null), ] Warning: Map Join MAPJOIN[15][bigTable=store_sales] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -219,31 +223,70 @@ STAGE PLANS: TableScan alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Not empty key IS false outputColumnNames: _col0, _col2, _col16 Statistics: Num rows: 200000 Data size: 92055200 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0, col 2) -> boolean, FilterStringGroupColEqualStringScalar(col 1, val M) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0, col 2) -> boolean, FilterStringGroupColEqualStringScalar(col 1, val U) -> boolean) -> boolean) -> boolean predicate: (((_col0 = _col16) and (_col2 = 'M')) or ((_col0 = _col16) and (_col2 = 'U'))) (type: boolean) Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 3:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_inner_join.q.out ql/src/test/results/clientpositive/vector_inner_join.q.out index e3e0cf4..bc468bf 100644 --- ql/src/test/results/clientpositive/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/vector_inner_join.q.out @@ -32,12 +32,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -73,12 +77,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -86,20 +101,45 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -120,12 +160,16 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -166,12 +210,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -179,16 +234,37 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: c:int + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -245,12 +321,16 @@ POSTHOOK: Input: default@values__tmp__table__4 POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -286,12 +366,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -299,20 +390,46 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -333,12 +450,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -374,12 +495,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -387,16 +519,38 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -417,12 +571,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -458,12 +616,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -471,20 +640,47 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 4, 5, 1] + selectExpressions: LongColMultiplyLongScalar(col 3, val 2) -> 4:long, LongColMultiplyLongScalar(col 0, val 5) -> 5:long Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -505,12 +701,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -546,12 +746,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -559,20 +770,46 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -593,12 +830,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -634,12 +875,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -647,20 +899,46 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -681,12 +959,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -722,12 +1004,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -735,20 +1028,46 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -769,12 +1088,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -810,12 +1133,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -823,20 +1157,46 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_interval_1.q.out ql/src/test/results/clientpositive/vector_interval_1.q.out index 373a6de..d6b957f 100644 --- ql/src/test/results/clientpositive/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/vector_interval_1.q.out @@ -39,7 +39,7 @@ POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] PREHOOK: query: -- constants/cast from string -explain +explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), @@ -47,13 +47,17 @@ select from vector_interval_1 order by str1 PREHOOK: type: QUERY POSTHOOK: query: -- constants/cast from string -explain +explain vectorization expression select str1, interval '1-2' year to month, interval_year_month(str1), interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -65,16 +69,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: str1 (type: string), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time) outputColumnNames: _col0, _col2, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 4, 5] + selectExpressions: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col4 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: interval_year_month), VALUE._col0 (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time) @@ -113,7 +142,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 PREHOOK: query: -- interval arithmetic -explain +explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -125,7 +154,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- interval arithmetic -explain +explain vectorization expression select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -136,6 +165,10 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -147,16 +180,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 6, 5, 8, 7] + selectExpressions: IntervalYearMonthColAddIntervalYearMonthColumn(col 4, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 5:long, IntervalYearMonthColSubtractIntervalYearMonthColumn(col 4, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthScalarSubtractIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 7:long Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2-4 (type: interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month) @@ -202,7 +260,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -213,7 +271,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -224,6 +282,10 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -235,16 +297,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 6, 5, 8, 7] + selectExpressions: IntervalDayTimeColAddIntervalDayTimeColumn(col 4, col 5)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time) -> 6:interval_day_time, IntervalDayTimeScalarAddIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 5:timestamp, IntervalDayTimeColSubtractIntervalDayTimeColumn(col 4, col 7)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 7:interval_day_time) -> 8:interval_day_time, IntervalDayTimeScalarSubtractIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 7:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2 04:06:08.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) @@ -291,7 +378,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-interval arithmetic -explain +explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -309,7 +396,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-interval arithmetic -explain +explain vectorization expression select dt, dt + interval '1-2' year to month, @@ -326,6 +413,10 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -337,16 +428,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17] + selectExpressions: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 4:long, DateColAddIntervalYearMonthColumn(col 1, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 1-2, col 1) -> 7:long, IntervalYearMonthColAddDateColumn(col 5, col 1)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:long, DateColSubtractIntervalYearMonthScalar(col 1, val 1-2) -> 9:long, DateColSubtractIntervalYearMonthColumn(col 1, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:long, DateColAddIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 11:timestamp, DateColAddIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddDateColumn(val 1 02:03:04.000000000, col 1) -> 14:timestamp, IntervalDayTimeColAddDateColumn(col 12, col 1)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:interval_day_time, DateColSubtractIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 16:timestamp, DateColSubtractIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) @@ -405,7 +521,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 PREHOOK: query: -- timestamp-interval arithmetic -explain +explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -423,7 +539,7 @@ select from vector_interval_1 order by ts PREHOOK: type: QUERY POSTHOOK: query: -- timestamp-interval arithmetic -explain +explain vectorization expression select ts, ts + interval '1-2' year to month, @@ -440,6 +556,10 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -451,16 +571,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17] + selectExpressions: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 4:timestamp, TimestampColAddIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 1-2, col 0) -> 7:timestamp, IntervalYearMonthColAddTimestampColumn(col 5, col 0)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-2) -> 9:timestamp, TimestampColSubtractIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:timestamp, TimestampColAddIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 11:timestamp, TimestampColAddIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 1 02:03:04.000000000, col 0) -> 14:timestamp, IntervalDayTimeColAddTimestampColumn(col 12, col 0)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 16:timestamp, TimestampColSubtractIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) @@ -519,7 +664,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 PREHOOK: query: -- timestamp-timestamp arithmetic -explain +explain vectorization expression select ts, ts - ts, @@ -528,7 +673,7 @@ select from vector_interval_1 order by ts PREHOOK: type: QUERY POSTHOOK: query: -- timestamp-timestamp arithmetic -explain +explain vectorization expression select ts, ts - ts, @@ -536,6 +681,10 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -547,16 +696,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 5, 6] + selectExpressions: TimestampColSubtractTimestampColumn(col 0, col 0) -> 4:interval_day_time, TimestampScalarSubtractTimestampColumn(val 2001-01-01 01:02:03.0, col 0) -> 5:timestamp, TimestampColSubtractTimestampScalar(col 0, val 2001-01-01 01:02:03.0) -> 6:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -597,7 +771,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-date arithmetic -explain +explain vectorization expression select dt, dt - dt, @@ -606,7 +780,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-date arithmetic -explain +explain vectorization expression select dt, dt - dt, @@ -614,6 +788,10 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -625,16 +803,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 5, 6] + selectExpressions: DateColSubtractDateColumn(col 1, col 1) -> 4:timestamp, DateScalarSubtractDateColumn(val 2001-01-01 00:00:00.0, col 1) -> 5:timestamp, DateColSubtractDateScalar(col 1, val 2001-01-01 00:00:00.0) -> 6:timestamp Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -675,7 +878,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-timestamp arithmetic -explain +explain vectorization expression select dt, ts - dt, @@ -687,7 +890,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-timestamp arithmetic -explain +explain vectorization expression select dt, ts - dt, @@ -698,6 +901,10 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -709,16 +916,41 @@ STAGE PLANS: TableScan alias: vector_interval_1 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4, 5, 6, 7, 8, 9] + selectExpressions: TimestampColSubtractDateColumn(col 0, col 1) -> 4:interval_day_time, TimestampScalarSubtractDateColumn(val 2001-01-01 01:02:03.0, col 1) -> 5:interval_day_time, TimestampColSubtractDateScalar(col 0, val 2001-01-01 00:00:00.0) -> 6:interval_day_time, DateColSubtractTimestampColumn(col 1, col 0) -> 7:interval_day_time, DateColSubtractTimestampScalar(col 1, val 2001-01-01 01:02:03.0) -> 8:interval_day_time, DateScalarSubtractTimestampColumn(val 2001-01-01 00:00:00.0, col 0) -> 9:interval_day_time Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) diff --git ql/src/test/results/clientpositive/vector_interval_2.q.out ql/src/test/results/clientpositive/vector_interval_2.q.out index 7f40f10..bc1cfbe 100644 --- ql/src/test/results/clientpositive/vector_interval_2.q.out +++ ql/src/test/results/clientpositive/vector_interval_2.q.out @@ -44,7 +44,7 @@ POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION [] POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION [] PREHOOK: query: -- interval comparisons in select clause -explain +explain vectorization expression select str1, -- Should all be true @@ -78,7 +78,7 @@ from vector_interval_2 order by str1 PREHOOK: type: QUERY POSTHOOK: query: -- interval comparisons in select clause -explain +explain vectorization expression select str1, -- Should all be true @@ -110,6 +110,10 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -121,16 +125,41 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] + selectExpressions: LongColEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, LongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 9:long, LongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 10:long, LongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 11:long, LongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 12:long, LongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 13:long, LongColGreaterLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 14:long, LongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 15:long, IntervalYearMonthColEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 16:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 17:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 18:long, IntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 19:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 20:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 21:long, IntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 22:long, IntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 23:long, IntervalYearMonthScalarEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 24:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 25:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 26:long, IntervalYearMonthScalarLessIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 27:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 28:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 29:long, IntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 30:long, IntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 31:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) @@ -218,7 +247,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str1, -- Should all be false @@ -244,7 +273,7 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str1, -- Should all be false @@ -270,6 +299,10 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -281,16 +314,41 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < 1-2) (type: boolean), (1-2 <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > 1-3) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + selectExpressions: LongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 9:long, IntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 10:long, IntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 11:long, IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 12:long, IntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> 13:long, IntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 14:long, IntervalYearMonthScalarLessIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 15:long, LongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 16:long, LongColGreaterLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 17:long, LongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 18:long, LongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 19:long, IntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 20:long, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 21:long, IntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 22:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) @@ -366,7 +424,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 false false false false false false false false false false false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str3, -- Should all be true @@ -398,7 +456,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str3, -- Should all be true @@ -430,6 +488,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -441,16 +503,41 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) (type: boolean), (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] + selectExpressions: IntervalDayTimeColEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 8:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 9:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 10:long, IntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 11:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 12:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 13:long, IntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 14:long, IntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 15:long, IntervalDayTimeColEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 16:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 17:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 18:long, IntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 19:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 20:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 21:long, IntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 22:long, IntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 23:long, IntervalDayTimeScalarEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 24:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 25:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 26:long, IntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 27:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 28:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 29:long, IntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 30:long, IntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 31:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) @@ -538,7 +625,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str3, -- Should all be false @@ -564,7 +651,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str3, -- Should all be false @@ -590,6 +677,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -601,16 +692,41 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < 1 02:03:04.000000000) (type: boolean), (1 02:03:04.000000000 <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > 1 02:03:05.000000000) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + selectExpressions: IntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 8:long, IntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 9:long, IntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 10:long, IntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 11:long, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 12:long, IntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> 13:long, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 14:long, IntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 15:long, IntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 16:long, IntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> 17:long, IntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 18:long, IntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> 19:long, IntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 20:long, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 21:long, IntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> 22:long Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) @@ -687,7 +803,7 @@ POSTHOOK: Input: default@vector_interval_2 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 false false false false false false false false false false false false false false false false false false PREHOOK: query: -- interval expressions in predicates -explain +explain vectorization expression select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -713,7 +829,7 @@ where order by ts PREHOOK: type: QUERY POSTHOOK: query: -- interval expressions in predicates -explain +explain vectorization expression select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -738,6 +854,10 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -749,18 +869,46 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterLongColNotEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColLessEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> boolean, FilterLongColGreaterEqualLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterLongColGreaterLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> boolean, FilterIntervalYearMonthColEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColNotEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColLessEqualIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColLessIntervalYearMonthScalar(col 6, val 15)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColGreaterEqualIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthColGreaterIntervalYearMonthScalar(col 6, val 14)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarNotEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarLessEqualIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarLessIntervalYearMonthColumn(val 14, col 6)(children: CastStringToIntervalYearMonth(col 3) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean, FilterIntervalYearMonthScalarGreaterIntervalYearMonthColumn(val 15, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> boolean) -> boolean predicate: ((CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) and (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -833,7 +981,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -858,7 +1006,7 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -883,6 +1031,10 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -894,18 +1046,46 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterIntervalDayTimeColEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColNotEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColLessEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColLessIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time, CastStringToIntervalDayTime(col 5) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterEqualIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterIntervalDayTimeColumn(col 6, col 7)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time, CastStringToIntervalDayTime(col 4) -> 7:interval_day_time) -> boolean, FilterIntervalDayTimeColEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColNotEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColLessEqualIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColLessIntervalDayTimeScalar(col 6, val 1 02:03:05.000000000)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterEqualIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeColGreaterIntervalDayTimeScalar(col 6, val 1 02:03:04.000000000)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarNotEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarLessEqualIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarLessIntervalDayTimeColumn(val 1 02:03:04.000000000, col 6)(children: CastStringToIntervalDayTime(col 5) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean, FilterIntervalDayTimeScalarGreaterIntervalDayTimeColumn(val 1 02:03:05.000000000, col 6)(children: CastStringToIntervalDayTime(col 4) -> 6:interval_day_time) -> boolean) -> boolean predicate: ((CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) and (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -978,7 +1158,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -998,7 +1178,7 @@ where and dt != dt + interval '1-2' year to month order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -1018,6 +1198,10 @@ where and dt != dt + interval '1-2' year to month order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1029,18 +1213,46 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarLessEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarGreaterEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColLessEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateColGreaterEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterLongColNotEqualLongColumn(col 1, col 7)(children: DateColAddIntervalYearMonthColumn(col 1, col 6)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month) -> 7:long) -> boolean, FilterDateScalarEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateScalarLessEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateScalarGreaterEqualDateColumn(val 11747, col 7)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateColEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateColLessEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterDateColGreaterEqualDateScalar(col 7, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean, FilterLongColNotEqualLongColumn(col 1, col 7)(children: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 7:long) -> boolean) -> boolean predicate: ((2002-03-01 = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= 2002-03-01) and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 = (dt + 1-2)) and (2002-03-01 <= (dt + 1-2)) and (2002-03-01 >= (dt + 1-2)) and ((dt + 1-2) = 2002-03-01) and ((dt + 1-2) <= 2002-03-01) and ((dt + 1-2) >= 2002-03-01) and (dt <> (dt + 1-2))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -1103,7 +1315,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1128,7 +1340,7 @@ where and ts > ts - interval '1' year order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1153,6 +1365,10 @@ where and ts > ts - interval '1' year order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1164,18 +1380,46 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2002-03-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2002-04-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2002-02-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2002-04-01 01:02:03.0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2002-03-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2002-04-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2002-02-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2002-04-01 01:02:03.0)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 0-0) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-0) -> 6:timestamp) -> boolean) -> boolean predicate: ((2002-03-01 01:02:03.0 = (ts + 1-2)) and (2002-03-01 01:02:03.0 <= (ts + 1-2)) and (2002-03-01 01:02:03.0 >= (ts + 1-2)) and (2002-04-01 01:02:03.0 <> (ts + 1-2)) and (2002-02-01 01:02:03.0 < (ts + 1-2)) and (2002-04-01 01:02:03.0 > (ts + 1-2)) and ((ts + 1-2) = 2002-03-01 01:02:03.0) and ((ts + 1-2) >= 2002-03-01 01:02:03.0) and ((ts + 1-2) <= 2002-03-01 01:02:03.0) and ((ts + 1-2) <> 2002-04-01 01:02:03.0) and ((ts + 1-2) > 2002-02-01 01:02:03.0) and ((ts + 1-2) < 2002-04-01 01:02:03.0) and (ts = (ts + 0-0)) and (ts <> (ts + 1-0)) and (ts <= (ts + 1-0)) and (ts < (ts + 1-0)) and (ts >= (ts - 1-0)) and (ts > (ts - 1-0))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -1249,7 +1493,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 PREHOOK: query: -- day to second expressions in predicate -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1275,7 +1519,7 @@ where order by ts PREHOOK: type: QUERY POSTHOOK: query: -- day to second expressions in predicate -explain +explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1300,6 +1544,10 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1311,18 +1559,46 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: DateColAddIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:03.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: DateColSubtractIntervalDayTimeScalar(col 1, val 0 01:02:04.000000000) -> 6:timestamp) -> boolean) -> boolean predicate: ((2001-01-01 01:02:03.0 = (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 <> (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 <= (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 < (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 >= (dt - 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 > (dt - 0 01:02:04.000000000)) and ((dt + 0 01:02:03.000000000) = 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) <> 2001-01-01 01:02:03.0) and ((dt + 0 01:02:03.000000000) >= 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) > 2001-01-01 01:02:03.0) and ((dt - 0 01:02:03.000000000) <= 2001-01-01 01:02:03.0) and ((dt - 0 01:02:04.000000000) < 2001-01-01 01:02:03.0) and (ts = (dt + 0 01:02:03.000000000)) and (ts <> (dt + 0 01:02:04.000000000)) and (ts <= (dt + 0 01:02:03.000000000)) and (ts < (dt + 0 01:02:04.000000000)) and (ts >= (dt - 0 01:02:03.000000000)) and (ts > (dt - 0 01:02:04.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -1395,7 +1671,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1420,7 +1696,7 @@ where and ts > ts - interval '1' day order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1445,6 +1721,10 @@ where and ts > ts - interval '1' day order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1456,18 +1736,46 @@ STAGE PLANS: TableScan alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterEqualTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03.0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampScalar(col 6, val 2001-01-01 01:02:03.0)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 0 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColNotEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessEqualTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColLessTimestampColumn(col 0, col 6)(children: TimestampColAddIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterEqualTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean, FilterTimestampColGreaterTimestampColumn(col 0, col 6)(children: TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 00:00:00.000000000) -> 6:timestamp) -> boolean) -> boolean predicate: ((2001-01-01 01:02:03.0 = (ts + 0 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <> (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <= (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 < (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 >= (ts - 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 > (ts - 1 00:00:00.000000000)) and ((ts + 0 00:00:00.000000000) = 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) <> 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) >= 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) > 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) <= 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) < 2001-01-01 01:02:03.0) and (ts = (ts + 0 00:00:00.000000000)) and (ts <> (ts + 1 00:00:00.000000000)) and (ts <= (ts + 1 00:00:00.000000000)) and (ts < (ts + 1 00:00:00.000000000)) and (ts >= (ts - 1 00:00:00.000000000)) and (ts > (ts - 1 00:00:00.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) diff --git ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out index ff16b3b..55f889c 100644 --- ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out +++ ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out @@ -36,7 +36,7 @@ POSTHOOK: Lineage: interval_arithmetic_1.dateval EXPRESSION [(unique_timestamps) POSTHOOK: Lineage: interval_arithmetic_1.tsval SIMPLE [(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, comment:null), ] tsval tsval PREHOOK: query: -- interval year-month arithmetic -explain +explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -49,7 +49,7 @@ from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY POSTHOOK: query: -- interval year-month arithmetic -explain +explain vectorization expression select dateval, dateval - interval '2-2' year to month, @@ -62,6 +62,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -73,16 +77,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 2-2) (type: date), (dateval - -2-2) (type: date), (dateval + 2-2) (type: date), (dateval + -2-2) (type: date), (-2-2 + dateval) (type: date), (2-2 + dateval) (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7] + selectExpressions: DateColSubtractIntervalYearMonthScalar(col 0, val 2-2) -> 2:long, DateColSubtractIntervalYearMonthScalar(col 0, val -2-2) -> 3:long, DateColAddIntervalYearMonthScalar(col 0, val 2-2) -> 4:long, DateColAddIntervalYearMonthScalar(col 0, val -2-2) -> 5:long, IntervalYearMonthScalarAddDateColumn(val -2-2, col 0) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 2-2, col 0) -> 7:long Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date) @@ -179,7 +208,7 @@ dateval c1 c2 c3 c4 c5 c6 9075-06-13 9073-04-13 9077-08-13 9077-08-13 9073-04-13 9073-04-13 9077-08-13 9209-11-11 9207-09-11 9212-01-11 9212-01-11 9207-09-11 9207-09-11 9212-01-11 9403-01-09 9400-11-09 9405-03-09 9405-03-09 9400-11-09 9400-11-09 9405-03-09 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -188,7 +217,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, dateval - date '1999-06-07', @@ -198,6 +227,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -209,16 +242,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 1999-06-07) (type: interval_day_time), (1999-06-07 - dateval) (type: interval_day_time), (dateval - dateval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4] + selectExpressions: DateColSubtractDateScalar(col 0, val 1999-06-07 00:00:00.0) -> 2:timestamp, DateScalarSubtractDateColumn(val 1999-06-07 00:00:00.0, col 0) -> 3:timestamp, DateColSubtractDateColumn(col 0, col 0) -> 4:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -309,7 +367,7 @@ dateval c1 c2 c3 9075-06-13 2584462 00:00:00.000000000 -2584462 00:00:00.000000000 0 00:00:00.000000000 9209-11-11 2633556 01:00:00.000000000 -2633556 01:00:00.000000000 0 00:00:00.000000000 9403-01-09 2704106 01:00:00.000000000 -2704106 01:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -321,7 +379,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tsval, tsval - interval '2-2' year to month, @@ -334,6 +392,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -345,16 +407,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tsval (type: timestamp), (tsval - 2-2) (type: timestamp), (tsval - -2-2) (type: timestamp), (tsval + 2-2) (type: timestamp), (tsval + -2-2) (type: timestamp), (-2-2 + tsval) (type: timestamp), (2-2 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7] + selectExpressions: TimestampColSubtractIntervalYearMonthScalar(col 1, val 2-2) -> 2:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 1, val -2-2) -> 3:timestamp, TimestampColAddIntervalYearMonthScalar(col 1, val 2-2) -> 4:timestamp, TimestampColAddIntervalYearMonthScalar(col 1, val -2-2) -> 5:timestamp, IntervalYearMonthScalarAddTimestampColumn(val -2-2, col 1) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 2-2, col 1) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -451,7 +538,7 @@ tsval c1 c2 c3 c4 c5 c6 9075-06-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9209-11-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9212-01-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9403-01-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 9405-03-09 18:12:33.547 9400-11-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -459,7 +546,7 @@ from interval_arithmetic_1 order by interval '2-2' year to month + interval '3-3' year to month limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -468,6 +555,10 @@ order by interval '2-2' year to month + interval '3-3' year to month limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -479,13 +570,37 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 5-5 (type: interval_year_month), -1-1 (type: interval_year_month) @@ -530,7 +645,7 @@ c0 c1 5-5 -1-1 5-5 -1-1 PREHOOK: query: -- interval day-time arithmetic -explain +explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -543,7 +658,7 @@ from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY POSTHOOK: query: -- interval day-time arithmetic -explain +explain vectorization expression select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -556,6 +671,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -567,16 +686,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), (dateval - 99 11:22:33.123456789) (type: timestamp), (dateval - -99 11:22:33.123456789) (type: timestamp), (dateval + 99 11:22:33.123456789) (type: timestamp), (dateval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + dateval) (type: timestamp), (99 11:22:33.123456789 + dateval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7] + selectExpressions: DateColSubtractIntervalDayTimeScalar(col 0, val 99 11:22:33.123456789) -> 2:timestamp, DateColSubtractIntervalDayTimeScalar(col 0, val -99 11:22:33.123456789) -> 3:timestamp, DateColAddIntervalDayTimeScalar(col 0, val 99 11:22:33.123456789) -> 4:timestamp, DateColAddIntervalDayTimeScalar(col 0, val -99 11:22:33.123456789) -> 5:timestamp, IntervalDayTimeScalarAddDateColumn(val -99 11:22:33.123456789, col 0) -> 6:timestamp, IntervalDayTimeScalarAddDateColumn(val 99 11:22:33.123456789, col 0) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -673,7 +817,7 @@ dateval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9075-09-20 11:22:33.123456789 9075-03-05 11:37:26.876543211 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9209-11-11 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9210-02-18 11:22:33.123456789 9209-08-03 13:37:26.876543211 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9403-01-09 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 9403-04-18 12:22:33.123456789 9402-10-01 13:37:26.876543211 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select dateval, tsval, @@ -683,7 +827,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select dateval, tsval, @@ -694,6 +838,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -705,16 +853,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: dateval (type: date), tsval (type: timestamp), (dateval - tsval) (type: interval_day_time), (tsval - dateval) (type: interval_day_time), (tsval - tsval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + selectExpressions: DateColSubtractTimestampColumn(col 0, col 1) -> 2:interval_day_time, TimestampColSubtractDateColumn(col 1, col 0) -> 3:interval_day_time, TimestampColSubtractTimestampColumn(col 1, col 1) -> 4:interval_day_time Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) @@ -807,7 +980,7 @@ dateval tsval c2 c3 c4 9075-06-13 9075-06-13 16:20:09.218517797 -0 16:20:09.218517797 0 16:20:09.218517797 0 00:00:00.000000000 9209-11-11 9209-11-11 04:08:58.223768453 -0 04:08:58.223768453 0 04:08:58.223768453 0 00:00:00.000000000 9403-01-09 9403-01-09 18:12:33.547 -0 18:12:33.547000000 0 18:12:33.547000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -819,7 +992,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -832,6 +1005,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -843,16 +1020,41 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tsval (type: timestamp), (tsval - 99 11:22:33.123456789) (type: timestamp), (tsval - -99 11:22:33.123456789) (type: timestamp), (tsval + 99 11:22:33.123456789) (type: timestamp), (tsval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + tsval) (type: timestamp), (99 11:22:33.123456789 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7] + selectExpressions: TimestampColSubtractIntervalDayTimeScalar(col 1, val 99 11:22:33.123456789) -> 2:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 1, val -99 11:22:33.123456789) -> 3:timestamp, TimestampColAddIntervalDayTimeScalar(col 1, val 99 11:22:33.123456789) -> 4:timestamp, TimestampColAddIntervalDayTimeScalar(col 1, val -99 11:22:33.123456789) -> 5:timestamp, IntervalDayTimeScalarAddTimestampColumn(val -99 11:22:33.123456789, col 1) -> 6:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 99 11:22:33.123456789, col 1) -> 7:timestamp Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -949,14 +1151,14 @@ tsval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 16:20:09.218517797 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9075-09-21 03:42:42.341974586 9075-03-06 03:57:36.095061008 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9209-11-11 04:08:58.223768453 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9210-02-18 15:31:31.347225242 9209-08-03 17:46:25.100311664 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9403-01-09 18:12:33.547 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 9403-04-19 06:35:06.670456789 9402-10-02 07:50:00.423543211 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second from interval_arithmetic_1 limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second @@ -964,6 +1166,10 @@ from interval_arithmetic_1 limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -975,21 +1181,43 @@ STAGE PLANS: TableScan alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: 109 20:30:40.246913578 (type: interval_day_time), 89 02:14:26.000000000 (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + selectExpressions: ConstantVectorExpression(val 109 20:30:40.246913578) -> 2:interval_day_time, ConstantVectorExpression(val 89 02:14:26.000000000) -> 3:interval_day_time Statistics: Num rows: 50 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out index 13a1bac..d21a813 100644 --- ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out @@ -136,7 +136,7 @@ POSTHOOK: Lineage: vectortab_b_1korc.si SIMPLE [(vectortab_b_1k)vectortab_b_1k.F POSTHOOK: Lineage: vectortab_b_1korc.t SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts2 SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select v1.s, v2.s, @@ -158,7 +158,7 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select v1.s, v2.s, @@ -180,6 +180,10 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -215,12 +219,24 @@ STAGE PLANS: TableScan alias: vectortab_a_1korc Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 14] + selectExpressions: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -228,20 +244,40 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: interval_day_time) 1 _col0 (type: string), _col1 (type: interval_day_time) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_join30.q.out ql/src/test/results/clientpositive/vector_join30.q.out index 45ed894..984af87 100644 --- ql/src/test/results/clientpositive/vector_join30.q.out +++ ql/src/test/results/clientpositive/vector_join30.q.out @@ -14,7 +14,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -22,7 +22,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -30,6 +30,10 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 @@ -49,18 +53,46 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -113,6 +145,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -124,6 +160,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -175,6 +219,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -221,19 +269,47 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -271,7 +347,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -279,7 +355,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -287,6 +363,10 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2 @@ -304,15 +384,39 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -365,6 +469,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -376,6 +484,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -433,16 +549,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -480,7 +620,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -488,7 +628,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -496,6 +636,10 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2 @@ -513,15 +657,39 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -574,6 +742,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -585,6 +757,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -642,16 +822,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -689,7 +893,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -700,7 +904,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -711,6 +915,10 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-9 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-10, Stage-11, Stage-12, Stage-2 @@ -733,18 +941,46 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -810,6 +1046,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -821,6 +1061,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -885,6 +1133,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -938,6 +1190,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -992,19 +1248,47 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -1023,19 +1307,47 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -1079,7 +1391,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -1090,7 +1402,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -1101,6 +1413,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-8 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-9, Stage-10, Stage-2 @@ -1121,15 +1437,39 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -1195,6 +1535,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -1206,6 +1550,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1270,6 +1622,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -1324,16 +1680,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -1352,16 +1732,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -1405,7 +1809,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1416,7 +1820,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1427,6 +1831,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 @@ -1445,15 +1853,39 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -1519,6 +1951,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -1530,6 +1966,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1595,16 +2039,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -1623,16 +2091,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -1676,7 +2168,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1687,7 +2179,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1698,6 +2190,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 @@ -1716,15 +2212,39 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -1790,6 +2310,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -1801,6 +2325,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1866,16 +2398,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -1894,16 +2450,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -1947,7 +2527,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1958,7 +2538,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1969,6 +2549,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 @@ -1987,15 +2571,39 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -2061,6 +2669,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -2072,6 +2684,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -2137,16 +2757,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -2165,16 +2809,40 @@ STAGE PLANS: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/vector_join_part_col_char.q.out index 6f6efc6..a9a42f3 100644 --- ql/src/test/results/clientpositive/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/vector_join_part_col_char.q.out @@ -97,10 +97,14 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -135,6 +139,13 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: char(50)) Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/vector_left_outer_join.q.out ql/src/test/results/clientpositive/vector_left_outer_join.q.out index 644b0dc..5e88346 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -92,8 +96,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out index 67725bd..8c0a763 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -80,12 +80,16 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -169,12 +173,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -258,12 +266,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -299,9 +311,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -312,20 +331,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -348,12 +387,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -389,9 +432,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -402,20 +452,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -438,12 +508,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -479,9 +553,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -492,20 +573,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -528,12 +629,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -569,9 +674,16 @@ STAGE PLANS: TableScan alias: tjoin1 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -582,20 +694,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/vector_leftsemi_mapjoin.q.out index 485e352..a0dd1d9 100644 --- ql/src/test/results/clientpositive/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_leftsemi_mapjoin.q.out @@ -132,85 +132,17 @@ POSTHOOK: query: select * from t4 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) +PREHOOK: query: explain vectorization only summary - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -228,85 +160,15 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -326,85 +188,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -416,89 +208,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col1 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -521,85 +239,15 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -614,89 +262,15 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b:t3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t3 - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -711,179 +285,35 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b:t2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t2 - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b:t1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t1 - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -900,85 +330,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-3 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -1009,202 +369,39 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -1223,607 +420,102 @@ POSTHOOK: Input: default@t3 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_5 -10 val_10 10 val_5 -10 val_10 10 val_5 -4 val_4 4 val_2 -8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_5 +10 val_10 10 val_5 +10 val_10 10 val_5 +4 val_4 4 val_2 +8 val_8 8 val_4 +PREHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -1851,109 +543,27 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -1988,98 +598,119 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +16 +18 +20 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -2135,104 +766,15 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 value (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -2275,86 +817,21 @@ POSTHOOK: Input: default@t3 10 10 16 -18 -20 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +18 +20 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -2366,10 +843,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2462,10 +945,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2560,10 +1049,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2650,10 +1145,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2755,10 +1256,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2848,10 +1355,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2945,10 +1458,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -3039,10 +1558,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -3134,10 +1659,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -3243,10 +1774,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -3337,10 +1874,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -3466,10 +2009,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -3570,10 +2119,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -3703,10 +2258,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -3830,10 +2391,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3962,10 +2529,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -4092,10 +2665,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -4224,10 +2803,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4369,10 +2954,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -4515,10 +3106,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -4600,10 +3197,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -4612,74 +3215,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -4697,10 +3268,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -4709,74 +3286,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -4796,10 +3341,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -4808,74 +3359,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -4887,10 +3406,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -4899,78 +3424,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col1 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -4993,10 +3485,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5005,74 +3503,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -5087,10 +3553,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5099,78 +3571,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t3 - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -5185,10 +3624,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only operator +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5197,78 +3642,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t2 - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -5280,86 +3692,60 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b:t1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t1 - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5376,10 +3762,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -5388,74 +3780,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -5486,10 +3846,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5498,74 +3864,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5581,10 +3915,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -5593,97 +3933,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5711,86 +3999,60 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only operator +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only operator +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 Stage-0 depends on stages: Stage-2 -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -5816,10 +4078,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -5828,101 +4096,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -5950,10 +4159,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -5962,83 +4177,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6059,115 +4230,68 @@ POSTHOOK: Input: default@t3 0 0 0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6210,10 +4334,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -6222,83 +4352,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6341,10 +4427,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -6353,83 +4445,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6466,104 +4514,57 @@ POSTHOOK: Input: default@t3 10 10 4 -4 -8 -8 -NULL -NULL -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE +4 +8 +8 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6619,10 +4620,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -6631,93 +4638,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-8 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 value (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -6766,10 +4727,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only operator +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -6778,69 +4745,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Execution mode: vectorized - Local Work: - Map Reduce Local Work + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -6852,10 +4788,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -6896,7 +4838,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6905,15 +4854,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6949,10 +4925,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -6993,7 +4975,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7002,15 +4991,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7048,10 +5064,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7092,7 +5114,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7101,15 +5130,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7139,10 +5195,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7183,7 +5245,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7192,19 +5261,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -7245,10 +5345,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7289,7 +5395,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7298,15 +5411,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7339,10 +5479,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7383,7 +5529,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7392,19 +5545,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -7437,10 +5621,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7481,7 +5671,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7490,19 +5687,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -7532,10 +5760,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7576,7 +5810,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7585,15 +5826,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7628,10 +5896,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -7672,7 +5946,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7681,15 +5962,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -7738,10 +6046,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7782,7 +6096,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7791,15 +6112,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7833,10 +6181,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -7893,7 +6247,14 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7904,20 +6265,52 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -7963,10 +6356,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -8007,7 +6406,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8016,15 +6422,42 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -8068,10 +6501,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -8137,7 +6576,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8148,15 +6594,42 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8202,10 +6675,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -8256,6 +6735,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -8264,15 +6746,42 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8330,10 +6839,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -8376,6 +6891,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -8402,6 +6924,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8462,10 +6992,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -8516,6 +7052,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Semi Join 0 to 1 @@ -8524,15 +7063,42 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8593,10 +7159,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -8647,6 +7219,9 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Semi Join 0 to 1 @@ -8655,15 +7230,43 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8726,10 +7329,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -8772,6 +7381,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -8798,6 +7414,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8871,10 +7495,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -8926,7 +7556,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8935,6 +7572,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8943,15 +7585,42 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9018,10 +7687,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -9062,12 +7737,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -9075,16 +7761,37 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -9104,10 +7811,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9148,7 +7861,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9157,15 +7877,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -9201,10 +7948,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9245,7 +7998,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9254,15 +8014,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -9300,10 +8087,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9344,7 +8137,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9353,15 +8153,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -9391,10 +8218,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9435,7 +8268,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9444,19 +8284,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -9497,10 +8368,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9541,7 +8418,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9550,15 +8434,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -9591,10 +8502,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9635,7 +8552,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9644,19 +8568,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -9689,10 +8644,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9733,7 +8694,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9742,19 +8710,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -9784,10 +8783,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9828,7 +8833,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9837,15 +8849,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -9880,10 +8919,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -9924,7 +8969,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -9933,15 +8985,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9990,10 +9069,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -10034,7 +9119,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10043,15 +9135,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -10085,10 +9204,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -10145,7 +9270,14 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10156,20 +9288,52 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -10215,10 +9379,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -10259,7 +9429,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10268,15 +9445,42 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -10320,10 +9524,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -10389,7 +9599,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10400,15 +9617,42 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -10454,10 +9698,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -10508,6 +9758,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -10516,15 +9769,42 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -10582,10 +9862,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -10628,6 +9914,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -10654,6 +9947,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -10714,10 +10015,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -10768,6 +10075,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Semi Join 0 to 1 @@ -10776,15 +10086,42 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -10845,10 +10182,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -10899,6 +10242,9 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Semi Join 0 to 1 @@ -10907,15 +10253,43 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -10978,10 +10352,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -11024,6 +10404,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -11050,6 +10437,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -11123,10 +10518,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -11178,7 +10579,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11187,6 +10595,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11195,15 +10608,42 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -11270,10 +10710,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -11314,12 +10760,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -11327,16 +10784,37 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -11356,10 +10834,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11400,7 +10884,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11409,15 +10900,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -11453,10 +10971,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11497,7 +11021,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11506,15 +11037,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -11552,10 +11110,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11596,7 +11160,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11605,15 +11176,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -11643,10 +11241,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11687,7 +11291,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11696,19 +11307,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -11749,10 +11391,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11793,7 +11441,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11802,15 +11457,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -11843,10 +11525,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11887,7 +11575,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11896,19 +11591,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -11941,10 +11667,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11985,7 +11717,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11994,19 +11733,50 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -12036,10 +11806,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -12080,7 +11856,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12089,15 +11872,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -12132,10 +11942,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -12176,7 +11992,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12185,15 +12008,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -12242,10 +12092,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -12286,7 +12142,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12295,15 +12158,42 @@ STAGE PLANS: keys: 0 key (type: int) 1 (2 * _col0) (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -12337,10 +12227,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -12397,7 +12293,14 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12408,20 +12311,52 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: String Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -12467,10 +12402,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -12511,7 +12452,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12520,15 +12468,42 @@ STAGE PLANS: keys: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -12572,10 +12547,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -12641,7 +12622,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12652,15 +12640,42 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -12706,10 +12721,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -12760,6 +12781,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Outer Join0 to 1 @@ -12768,15 +12792,42 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -12834,10 +12885,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -12880,6 +12937,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -12906,6 +12970,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -12966,10 +13038,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -13020,6 +13098,9 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Semi Join 0 to 1 @@ -13028,15 +13109,42 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13097,10 +13205,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -13151,6 +13265,9 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Left Semi Join 0 to 1 @@ -13159,15 +13276,43 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13230,10 +13375,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -13276,6 +13427,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -13302,6 +13460,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13375,10 +13541,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -13430,7 +13602,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -13439,6 +13618,11 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -13447,15 +13631,42 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 value (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13522,10 +13733,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -13566,12 +13783,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0, val 100) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -13579,16 +13807,37 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index 160b088..6903ab5 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2 @@ -54,6 +58,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -107,6 +119,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -147,6 +163,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -242,6 +262,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work @@ -276,19 +300,23 @@ POSTHOOK: Input: default@lineitem 64128 9141 82704 7721 PREHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY POSTHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization expression select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2 @@ -321,6 +349,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -374,6 +410,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -414,6 +454,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -509,6 +553,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out index 8c1acc8..be7e154 100644 --- ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out @@ -211,7 +211,7 @@ POSTHOOK: Output: default@store PREHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, @@ -226,7 +226,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, @@ -238,6 +238,10 @@ explain select order by s_state limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -281,6 +285,13 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -322,6 +333,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -359,6 +377,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -383,6 +409,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_multi_insert.q.out ql/src/test/results/clientpositive/vector_multi_insert.q.out index e9f106d..4013cd4 100644 --- ql/src/test/results/clientpositive/vector_multi_insert.q.out +++ ql/src/test/results/clientpositive/vector_multi_insert.q.out @@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@orc1 POSTHOOK: Output: default@orc1 -PREHOOK: query: explain from orc1 a +PREHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 PREHOOK: type: QUERY -POSTHOOK: query: explain from orc1 a +POSTHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 @@ -152,6 +156,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-9 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out index 8845cb2..d9591d0 100644 --- ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out +++ ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +PREHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +POSTHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,6 +31,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is int but the common type is bigint + vectorized: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_non_string_partition.q.out ql/src/test/results/clientpositive/vector_non_string_partition.q.out index 11677b7..1d60bf2 100644 --- ql/src/test/results/clientpositive/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/vector_non_string_partition.q.out @@ -27,10 +27,14 @@ POSTHOOK: query: SHOW PARTITIONS non_string_part POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@non_string_part ctinyint=__HIVE_DEFAULT_PARTITION__ -PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -42,20 +46,48 @@ STAGE PLANS: TableScan alias: non_string_part Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4] Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) @@ -98,10 +130,14 @@ POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ 799471 NULL 1248059 NULL 1286921 NULL -PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -113,19 +149,47 @@ STAGE PLANS: TableScan alias: non_string_part Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) diff --git ql/src/test/results/clientpositive/vector_null_projection.q.out ql/src/test/results/clientpositive/vector_null_projection.q.out index 779f787..949a928 100644 --- ql/src/test/results/clientpositive/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/vector_null_projection.q.out @@ -37,13 +37,17 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@b POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization expression select NULL from a PREHOOK: type: QUERY POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization expression select NULL from a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -66,6 +70,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -82,12 +92,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@a #### A masked pattern was here #### NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select NULL as x from a union distinct select NULL as x from b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select NULL as x from a union distinct select NULL as x from b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -134,6 +148,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: void) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: void) diff --git ql/src/test/results/clientpositive/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/vector_nullsafe_join.q.out index 2090c24..306c665 100644 --- ql/src/test/results/clientpositive/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/vector_nullsafe_join.q.out @@ -49,11 +49,15 @@ POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -82,27 +86,50 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -131,10 +158,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -182,7 +213,14 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -193,20 +231,40 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -226,10 +284,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -271,6 +333,9 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -279,21 +344,41 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -340,10 +425,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -391,7 +480,14 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -402,21 +498,41 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -436,10 +552,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -481,6 +601,9 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -489,21 +612,41 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -629,11 +772,15 @@ NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL PREHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -662,27 +809,50 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -711,10 +881,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -762,7 +936,14 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -773,20 +954,40 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -806,10 +1007,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -851,6 +1056,9 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -859,21 +1067,41 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -920,10 +1148,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -971,7 +1203,14 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -982,21 +1221,41 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -1016,10 +1275,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -1061,6 +1324,9 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -1069,21 +1335,41 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/vector_number_compare_projection.q.out index 4f1a26c..7bfe814 100644 --- ql/src/test/results/clientpositive/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/vector_number_compare_projection.q.out @@ -103,7 +103,7 @@ scratch.t scratch.si scratch.i scratch.b scratch.f scratch.d scratch.dc PREHOOK: query: -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q @@ -111,12 +111,16 @@ PREHOOK: type: QUERY POSTHOOK: query: -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,16 +132,41 @@ STAGE PLANS: TableScan alias: vectortab2k_orc Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), (t < 0) (type: boolean), (si <= 0) (type: boolean), (i = 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 7, 8, 9] + selectExpressions: LongColLessLongScalar(col 0, val 0) -> 7:long, LongColLessEqualLongScalar(col 1, val 0) -> 8:long, LongColEqualLongScalar(col 2, val 0) -> 9:long Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) @@ -176,17 +205,21 @@ POSTHOOK: Input: default@vectortab2k_orc #### A masked pattern was here #### c0 -3601806268 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -198,16 +231,41 @@ STAGE PLANS: TableScan alias: vectortab2k_orc Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), (t > 0) (type: boolean), (si >= 0) (type: boolean), (i <> 0) (type: boolean), (b > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 7, 8, 9, 10] + selectExpressions: LongColGreaterLongScalar(col 0, val 0) -> 7:long, LongColGreaterEqualLongScalar(col 1, val 0) -> 8:long, LongColNotEqualLongScalar(col 2, val 0) -> 9:long, LongColGreaterLongScalar(col 3, val 0) -> 10:long Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int) diff --git ql/src/test/results/clientpositive/vector_nvl.q.out ql/src/test/results/clientpositive/vector_nvl.q.out index 8330810..08cc168 100644 --- ql/src/test/results/clientpositive/vector_nvl.q.out +++ ql/src/test/results/clientpositive/vector_nvl.q.out @@ -1,13 +1,17 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -19,24 +23,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean predicate: cdouble is null (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: null (type: double), 100.0 (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: ConstantVectorExpression(val null) -> 12:double, ConstantVectorExpression(val 100.0) -> 13:double Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -68,14 +98,18 @@ NULL 100.0 NULL 100.0 NULL 100.0 NULL 100.0 -PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -87,21 +121,43 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float), NVL(cfloat,1) (type: float) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 13] + selectExpressions: VectorCoalesce(columns [4, 12])(children: col 4, ConstantVectorExpression(val 1.0) -> 12:double) -> 13:float Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -131,33 +187,72 @@ NULL 1.0 27.0 27.0 -11.0 -11.0 61.0 61.0 -PREHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 10 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: ConstantVectorExpression(val 10) -> 12:long + Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 10 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: SELECT nvl(null, 10) as n FROM alltypesorc @@ -181,33 +276,55 @@ POSTHOOK: Input: default@alltypesorc 10 10 10 -PREHOOK: query: EXPLAIN SELECT nvl(null, null) as n +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, null) as n +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: null (type: void) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: SELECT nvl(null, null) as n FROM alltypesorc diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index 4840b41..52746ce 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -119,12 +123,26 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 7 + native: false + projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -133,9 +151,26 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -159,6 +194,14 @@ STAGE PLANS: sort order: - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_outer_join0.q.out ql/src/test/results/clientpositive/vector_outer_join0.q.out index 68134f2..04dd6ff 100644 --- ql/src/test/results/clientpositive/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/vector_outer_join0.q.out @@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -100,9 +104,16 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -110,16 +121,38 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: v1:string, a:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, String Local Work: Map Reduce Local Work @@ -149,12 +182,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -187,9 +224,16 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -197,16 +241,38 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c:int, v2:string + partitionColumnCount: 0 + scratchColumnTypeNames: String, bigint Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index c015578..944a1da 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -216,18 +216,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -260,9 +264,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -270,16 +281,38 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col2 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, Double, Double, String, String, Timestamp, Timestamp, bigint, bigint Local Work: Map Reduce Local Work @@ -326,18 +359,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -370,9 +407,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -380,16 +424,37 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -522,7 +587,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -531,7 +596,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -540,6 +605,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -587,9 +656,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -597,6 +673,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -605,20 +686,53 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 Statistics: Num rows: 17 Data size: 4017 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index 98c65d4..e0fe9e0 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,6 +244,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -291,9 +295,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int), cbigint (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -301,6 +312,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 22 Data size: 4600 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -309,20 +325,53 @@ STAGE PLANS: keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 24 Data size: 5060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index a6d5c59..f3e40d0 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,104 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -367,7 +270,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -376,7 +279,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -385,104 +288,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -508,7 +314,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -517,7 +323,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -526,104 +332,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index b6def86..d33235e 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -246,79 +246,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint","bigint","bigint","Double","Double","String","String","Timestamp","Timestamp","bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: -- SORT_QUERY_RESULTS select * @@ -391,79 +331,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:hd - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: -- SORT_QUERY_RESULTS select c.ctinyint @@ -892,7 +772,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -901,7 +781,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -910,104 +790,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.ctinyint diff --git ql/src/test/results/clientpositive/vector_outer_join5.q.out ql/src/test/results/clientpositive/vector_outer_join5.q.out index 4255a94..71db964 100644 --- ql/src/test/results/clientpositive/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/vector_outer_join5.q.out @@ -66,95 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:st - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:st - TableScan - alias: st - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:st":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:st":{"TableScan":{"alias:":"st","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -174,101 +100,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:sm":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:sm":{"TableScan":{"alias:":"sm","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"filter predicates:":{"0":"{(_col1 = 2)}","1":""},"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cmodint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"filter predicates:":{"0":"{(_col1 = 2)}","1":""},"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOuterFilteredOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0, 1]","dataColumns:":["ctinyint:tinyint","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -288,101 +134,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6058 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:sm":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:sm":{"TableScan":{"alias:":"sm","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"filter predicates:":{"0":"{((UDFToInteger(_col0) pmod 4) = _col1)}","1":""},"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cmodint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"filter predicates:":{"0":"{((UDFToInteger(_col0) pmod 4) = _col1)}","1":""},"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOuterFilteredOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0, 1]","dataColumns:":["ctinyint:tinyint","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -402,101 +168,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6248 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:sm":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:sm":{"TableScan":{"alias:":"sm","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"filter predicates:":{"0":"{(_col0 < 100)}","1":""},"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"filter predicates:":{"0":"{(_col0 < 100)}","1":""},"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOuterFilteredOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -516,7 +202,7 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -525,7 +211,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -534,104 +220,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - $hdt$_2:s2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - $hdt$_2:s2 - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:sm":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:s2":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:sm":{"TableScan":{"alias:":"sm","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cbigint (type: bigint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"UDFToLong(_col1) (type: bigint)","1":"(_col0 pmod 8) (type: bigint)"}}}}}}},"$hdt$_2:s2":{"TableScan":{"alias:":"s2","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cmodint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"UDFToLong(_col1) (type: bigint)","1":"(_col0 pmod 8) (type: bigint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0, 1]","dataColumns:":["ctinyint:tinyint","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -717,95 +306,21 @@ POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:st - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:st - TableScan - alias: st - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:st":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:st":{"TableScan":{"alias:":"st","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cmodtinyint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"}}}}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"cmodtinyint (type: int)","outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0]","dataColumns:":["cmodtinyint:int","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -825,101 +340,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 39112 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:sm":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:sm":{"TableScan":{"alias:":"sm","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cmodtinyint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"filter predicates:":{"0":"{(_col1 = 2)}","1":""},"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"}}}}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"cmodtinyint (type: int), cmodint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"filter predicates:":{"0":"{(_col1 = 2)}","1":""},"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOuterFilteredOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0, 1]","dataColumns:":["cmodtinyint:int","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -939,101 +374,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 11171 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:sm":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:sm":{"TableScan":{"alias:":"sm","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cmodtinyint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"filter predicates:":{"0":"{((_col0 pmod 4) = _col1)}","1":""},"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"}}}}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"cmodtinyint (type: int), cmodint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"filter predicates:":{"0":"{((_col0 pmod 4) = _col1)}","1":""},"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOuterFilteredOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0, 1]","dataColumns:":["cmodtinyint:int","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1053,101 +408,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 14371 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:sm":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:sm":{"TableScan":{"alias:":"sm","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cmodtinyint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"filter predicates:":{"0":"{(_col0 < 3)}","1":""},"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"}}}}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"cmodtinyint (type: int)","outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"filter predicates:":{"0":"{(_col0 < 3)}","1":""},"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOuterFilteredOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0]","dataColumns:":["cmodtinyint:int","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1167,7 +442,7 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 17792 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1176,7 +451,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1185,104 +460,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - $hdt$_2:s2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - $hdt$_2:s2 - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:sm":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:s2":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:sm":{"TableScan":{"alias:":"sm","Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cbigint (type: bigint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"UDFToLong(_col1) (type: bigint)","1":"(_col0 pmod 8) (type: bigint)"}}}}}}},"$hdt$_2:s2":{"TableScan":{"alias:":"s2","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cmodtinyint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"s","Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1]"},"children":{"Select Operator":{"expressions:":"cmodtinyint (type: int), cmodint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"UDFToLong(_col1) (type: bigint)","1":"(_col0 pmod 8) (type: bigint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"2","includeColumns:":"[0, 1]","dataColumns:":["cmodtinyint:int","cmodint:int"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/results/clientpositive/vector_outer_join6.q.out ql/src/test/results/clientpositive/vector_outer_join6.q.out index 8c09716..8390e2e 100644 --- ql/src/test/results/clientpositive/vector_outer_join6.q.out +++ ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -126,106 +126,15 @@ POSTHOOK: Output: default@TJOIN4 POSTHOOK: Lineage: tjoin4.c1 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin4.c2 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin4.rnum SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:tjoin2 - Fetch Operator - limit: -1 - $hdt$_1:tjoin3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:tjoin2 - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - $hdt$_1:tjoin3 - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_1:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_1:tjoin2":{"TableScan":{"alias:":"tjoin2","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"}}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"}}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col3"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col1 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -244,102 +153,15 @@ POSTHOOK: Input: default@tjoin3 0 3 0 1 NULL NULL 2 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:tjoin2 - Fetch Operator - limit: -1 - $hdt$_1:tjoin3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:tjoin2 - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - $hdt$_1:tjoin3 - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_1:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_1:tjoin2":{"TableScan":{"alias:":"tjoin2","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"}}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"c1 (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"}}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Supports Key Types IS true","Not empty key IS true","When Fast Hash Table, then requires no Hybrid Hash Join IS true","Small table vectorizes IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out index b224da8..e5afab4 100644 --- ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out @@ -76,12 +76,16 @@ POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_date_sk SIMPLE [(invent POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -93,20 +97,50 @@ STAGE PLANS: TableScan alias: inventory_part_0 Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -195,12 +229,16 @@ POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_date_sk SIMPLE [(in POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -212,20 +250,50 @@ STAGE PLANS: TableScan alias: inventory_part_1 Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -316,12 +384,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2a POSTHOOK: Input: default@inventory_part_2a@par=2 POSTHOOK: Output: default@inventory_part_2a@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -333,20 +405,50 @@ STAGE PLANS: TableScan alias: inventory_part_2a Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -418,12 +520,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2b POSTHOOK: Input: default@inventory_part_2b@par1=2/par2=3 POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_2b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -435,20 +541,50 @@ STAGE PLANS: TableScan alias: inventory_part_2b Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -526,12 +662,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_3 POSTHOOK: Input: default@inventory_part_3@par=2 POSTHOOK: Output: default@inventory_part_3@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(inv_quantity_on_hand) from inventory_part_3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -543,20 +683,50 @@ STAGE PLANS: TableScan alias: inventory_part_3 Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out index 09dd873..8b41ab4 100644 --- ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out @@ -256,12 +256,16 @@ POSTHOOK: Input: default@flights_tiny_orc 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -274,17 +278,41 @@ STAGE PLANS: TableScan alias: flights_tiny_orc Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int), _col2 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) @@ -310,6 +338,14 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) @@ -365,12 +401,16 @@ Chicago New York 2010-10-24 2010-10-24 07:00:00 113.0 897 Chicago New York 2010-10-25 2010-10-25 07:00:00 -1.0 897 Chicago New York 2010-10-26 2010-10-26 07:00:00 0.0 897 Chicago New York 2010-10-27 2010-10-27 07:00:00 -11.0 897 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -382,12 +422,25 @@ STAGE PLANS: TableScan alias: flights_tiny_orc Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2 + native: false keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -396,9 +449,26 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -819,28 +889,63 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: flights_tiny_orc_partitioned_date + Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from flights_tiny_orc_partitioned_date PREHOOK: type: QUERY @@ -1011,12 +1116,16 @@ Chicago New York 2010-10-31 07:00:00 -4.0 1531 2010-10-31 Chicago New York 2010-10-31 07:00:00 -22.0 1610 2010-10-31 Chicago New York 2010-10-31 07:00:00 -15.0 3198 2010-10-31 Washington New York 2010-10-31 07:00:00 -18.0 7282 2010-10-31 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1029,17 +1138,41 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_date Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) @@ -1065,6 +1198,14 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 5775 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) @@ -1144,12 +1285,16 @@ Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 Chicago New York 2010-10-25 07:00:00 -1.0 897 2010-10-25 Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1161,12 +1306,25 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_date Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5] Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -1175,9 +1333,26 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1622,28 +1797,63 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- 2010-10-29 07:00:00 12 2010-10-30 07:00:00 11 2010-10-31 07:00:00 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: flights_tiny_orc_partitioned_timestamp + Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Select Operator + expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: flights_tiny_orc_partitioned_timestamp - Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from flights_tiny_orc_partitioned_timestamp PREHOOK: type: QUERY @@ -1814,12 +2024,16 @@ Chicago New York 2010-10-31 -4.0 1531 2010-10-31 07:00:00 Chicago New York 2010-10-31 -22.0 1610 2010-10-31 07:00:00 Chicago New York 2010-10-31 -15.0 3198 2010-10-31 07:00:00 Washington New York 2010-10-31 -18.0 7282 2010-10-31 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1832,17 +2046,41 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_timestamp Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) @@ -1868,6 +2106,14 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 6175 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) @@ -1947,12 +2193,16 @@ Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 Chicago New York 2010-10-25 -1.0 897 2010-10-25 07:00:00 Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 Chicago New York 2010-10-27 -11.0 897 2010-10-27 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1964,12 +2214,25 @@ STAGE PLANS: TableScan alias: flights_tiny_orc_partitioned_timestamp Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] Select Operator expressions: fl_time (type: timestamp) outputColumnNames: fl_time + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5] Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false keys: fl_time (type: timestamp) mode: hash outputColumnNames: _col0, _col1 @@ -1978,9 +2241,26 @@ STAGE PLANS: key expressions: _col0 (type: timestamp) sort order: + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_reduce1.q.out ql/src/test/results/clientpositive/vector_reduce1.q.out index fe69ebd..37964dc 100644 --- ql/src/test/results/clientpositive/vector_reduce1.q.out +++ ql/src/test/results/clientpositive/vector_reduce1.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select b from vectortab2korc order by b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select b from vectortab2korc order by b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -122,15 +126,39 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: b (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_reduce2.q.out ql/src/test/results/clientpositive/vector_reduce2.q.out index 22d95ad..c70979b 100644 --- ql/src/test/results/clientpositive/vector_reduce2.q.out +++ ql/src/test/results/clientpositive/vector_reduce2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s, i, s2 from vectortab2korc order by s, i, s2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -122,15 +126,39 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: s (type: string), i (type: int), s2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 2, 9] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) diff --git ql/src/test/results/clientpositive/vector_reduce3.q.out ql/src/test/results/clientpositive/vector_reduce3.q.out index c7dffd8..6b85bc9 100644 --- ql/src/test/results/clientpositive/vector_reduce3.q.out +++ ql/src/test/results/clientpositive/vector_reduce3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select s from vectortab2korc order by s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select s from vectortab2korc order by s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -122,15 +126,39 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: s (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index 5d7000a..96aa587 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -14,20 +14,24 @@ POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -40,11 +44,25 @@ STAGE PLANS: TableScan alias: decimal_test Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 2) -> 4:long) -> boolean, SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 3) -> 4:long) -> boolean) -> boolean predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cdecimal1) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 2) -> decimal(20,10) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -53,10 +71,27 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -81,6 +116,14 @@ STAGE PLANS: Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(20,10)), KEY.reducesinkkey3 (type: decimal(23,14)), VALUE._col0 (type: decimal(20,10)) diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index 39a64d8..f13fff0 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -97,16 +97,20 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,21 +122,43 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [7, 12, 11] + selectExpressions: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 12:String_Family, StringGroupColConcatStringScalar(col 13, val |)(children: StringScalarConcatStringGroupCol(val |, col 11)(children: StringRTrim(col 13)(children: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 13:String_Family) -> 11:String) -> 13:String_Family) -> 11:String_Family Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -281,20 +307,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -307,11 +337,25 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [19] + selectExpressions: StringGroupConcatColCol(col 17, col 18)(children: StringGroupColConcatStringScalar(col 18, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 17)(children: CastLongToString(col 13)(children: CastDoubleToLong(col 15)(children: DoubleColAddDoubleScalar(col 16, val 1.0)(children: DoubleColDivideDoubleScalar(col 15, val 3.0)(children: CastLongToDouble(col 14)(children: LongColSubtractLongScalar(col 13, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 13:long) -> 14:long) -> 15:double) -> 16:double) -> 15:double) -> 13:long) -> 17:String) -> 18:String_Family) -> 17:String_Family, CastLongToString(col 13)(children: VectorUDFYearDate(col 12, field YEAR) -> 13:long) -> 18:String) -> 19:String_Family Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 19 + native: false + projectedOutputColumns: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -320,9 +364,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -345,6 +406,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/vector_string_decimal.q.out ql/src/test/results/clientpositive/vector_string_decimal.q.out index e0a3563..06b0e4f 100644 --- ql/src/test/results/clientpositive/vector_string_decimal.q.out +++ ql/src/test/results/clientpositive/vector_string_decimal.q.out @@ -40,12 +40,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_decimal POSTHOOK: Lineage: orc_decimal.id SIMPLE [(staging)staging.FieldSchema(name:id, type:decimal(18,0), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from orc_decimal where id in ('100000000', '200000000') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from orc_decimal where id in ('100000000', '200000000') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,6 +75,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is decimal(18,0) but the common type is string + vectorized: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_struct_in.q.out ql/src/test/results/clientpositive/vector_struct_in.q.out index 3971f55..5ca42da 100644 --- ql/src/test/results/clientpositive/vector_struct_in.q.out +++ ql/src/test/results/clientpositive/vector_struct_in.q.out @@ -22,7 +22,7 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -36,7 +36,7 @@ struct('nine','1'), struct('ten','1') ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -50,6 +50,10 @@ struct('nine','1'), struct('ten','1') ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -61,21 +65,43 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), lineid (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -115,7 +141,7 @@ POSTHOOK: Input: default@test_1 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -129,7 +155,7 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -143,6 +169,10 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -154,18 +184,37 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: id (type: string), lineid (type: string), (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> 3:boolean Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -225,7 +274,7 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -239,7 +288,7 @@ struct(9,1), struct(10,1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -253,6 +302,10 @@ struct(9,1), struct(10,1) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -264,21 +317,43 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), lineid (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -318,7 +393,7 @@ POSTHOOK: Input: default@test_2 #### A masked pattern was here #### 1 1 7 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -332,7 +407,7 @@ struct(9,1), struct(10,1) ) as b from test_2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -346,6 +421,10 @@ struct(9,1), struct(10,1) ) as b from test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -357,18 +436,37 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: id (type: int), lineid (type: int), (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> 3:boolean Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -428,7 +526,7 @@ POSTHOOK: Input: default@values__tmp__table__3 POSTHOOK: Output: default@test_3 POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -442,7 +540,7 @@ struct('nine',1), struct('ten',1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -456,6 +554,10 @@ struct('nine',1), struct('ten',1) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -467,21 +569,43 @@ STAGE PLANS: TableScan alias: test_3 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), lineid (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -521,7 +645,7 @@ POSTHOOK: Input: default@test_3 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -535,7 +659,7 @@ struct('nine',1), struct('ten',1) ) as b from test_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -549,6 +673,10 @@ struct('nine',1), struct('ten',1) ) as b from test_3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -560,18 +688,37 @@ STAGE PLANS: TableScan alias: test_3 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: id (type: string), lineid (type: int), (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> 3:boolean Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -632,7 +779,7 @@ POSTHOOK: Output: default@test_4 POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -647,7 +794,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -662,6 +809,10 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -673,21 +824,43 @@ STAGE PLANS: TableScan alias: test_4 Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> boolean predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -728,7 +901,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_4 #### A masked pattern was here #### 1 a 0.5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -743,7 +916,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -758,6 +931,10 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -769,18 +946,37 @@ STAGE PLANS: TableScan alias: test_4 Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double), (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 4] + selectExpressions: StructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> 4:boolean Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index 7e5f24d..5d52cf4 100644 --- ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -1,10 +1,14 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select 'key1', 'value1' from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select 'key1', 'value1' from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -17,18 +21,37 @@ STAGE PLANS: alias: alltypesorc Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: 'key1' (type: string), 'value1' (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: ConstantVectorExpression(val key1) -> 12:string, ConstantVectorExpression(val value1) -> 13:string Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -54,15 +77,19 @@ POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@decimal_2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -81,12 +108,23 @@ STAGE PLANS: alias: alltypesorc Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: 17.29 (type: decimal(18,9)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: ConstantVectorExpression(val 17.29) -> 12:decimal(18,9) Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -94,6 +132,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.decimal_2 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-7 Conditional Operator @@ -168,14 +214,18 @@ POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@decimal_2 POSTHOOK: Output: default@decimal_2 PREHOOK: query: -- Dummy tables HIVE-13190 -explain +explain vectorization expression select count(1) from (select * from (Select 1 a) x order by x.a) y PREHOOK: type: QUERY POSTHOOK: query: -- Dummy tables HIVE-13190 -explain +explain vectorization expression select count(1) from (select * from (Select 1 a) x order by x.a) y POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -195,6 +245,13 @@ STAGE PLANS: key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE @@ -218,6 +275,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -248,13 +313,17 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### _c0 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression create temporary table dual as select 1 PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression create temporary table dual as select 1 POSTHOOK: type: CREATETABLE_AS_SELECT Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -286,6 +355,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dual + Map Vectorization: + enabled: false +#### A masked pattern was here #### Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_udf1.q.out ql/src/test/results/clientpositive/vector_udf1.q.out index 0d8c206..9a3ef03 100644 --- ql/src/test/results/clientpositive/vector_udf1.q.out +++ ql/src/test/results/clientpositive/vector_udf1.q.out @@ -25,7 +25,7 @@ POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- UDFs with varchar support -explain +explain vectorization expression select concat(c1, c2), concat(c3, c4), @@ -33,13 +33,17 @@ select from varchar_udf_1 limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- UDFs with varchar support -explain +explain vectorization expression select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -51,21 +55,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: concat(c1, c2) (type: string), concat(c3, c4) (type: varchar(30)), (concat(c1, c2) = UDFToString(concat(c3, c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 9] + selectExpressions: StringGroupConcatColCol(col 0, col 1) -> 4:String_Family, StringGroupConcatColCol(col 2, col 3) -> 5:String_Family, StringGroupColEqualStringGroupColumn(col 6, col 8)(children: StringGroupConcatColCol(col 0, col 1) -> 6:String_Family, CastStringGroupToString(col 7)(children: StringGroupConcatColCol(col 2, col 3) -> 7:String_Family) -> 8:String) -> 9:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -90,20 +116,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238val_238 238val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -115,21 +145,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: upper(c2) (type: string), upper(c4) (type: varchar(20)), (upper(c2) = UDFToString(upper(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 9] + selectExpressions: StringUpper(col 1) -> 4:String, StringUpper(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 8)(children: StringUpper(col 1) -> 6:String, CastStringGroupToString(col 7)(children: StringUpper(col 3) -> 7:String) -> 8:String) -> 9:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -154,20 +206,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### VAL_238 VAL_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -179,21 +235,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: lower(c2) (type: string), lower(c4) (type: varchar(20)), (lower(c2) = UDFToString(lower(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 9] + selectExpressions: StringLower(col 1) -> 4:String, StringLower(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 8)(children: StringLower(col 1) -> 6:String, CastStringGroupToString(col 7)(children: StringLower(col 3) -> 7:String) -> 8:String) -> 9:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -219,7 +297,7 @@ POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: -- Scalar UDFs -explain +explain vectorization expression select ascii(c2), ascii(c4), @@ -227,13 +305,17 @@ select from varchar_udf_1 limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- Scalar UDFs -explain +explain vectorization expression select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -259,6 +341,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFBridge ==> ascii (Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -283,20 +371,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 118 118 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -322,6 +414,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFConcatWS(Const string |, Column[c1], Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -346,20 +444,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238|val_238 238|val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -385,6 +487,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFDecode(GenericUDFEncode(Column[c2], Const string US-ASCII), Const string US-ASCII) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -409,20 +517,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -448,6 +560,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFInstr(Column[c2], Const string _) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -472,20 +590,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 4 4 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -497,21 +619,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: length(c2) (type: int), length(c4) (type: int), (length(c2) = length(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringLength(col 1) -> 4:Long, StringLength(col 3) -> 5:Long, LongColEqualLongColumn(col 6, col 7)(children: StringLength(col 1) -> 6:Long, StringLength(col 3) -> 7:Long) -> 8:long Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -536,20 +680,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 7 7 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -561,21 +709,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: 5 (type: int), 5 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6] + selectExpressions: ConstantVectorExpression(val 5) -> 4:long, ConstantVectorExpression(val 5) -> 5:long, ConstantVectorExpression(val 1) -> 6:long Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -600,20 +770,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 5 5 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -639,6 +813,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFLpad(Column[c2], Const int 15, Const string ) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -663,20 +843,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -688,21 +872,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: ltrim(c2) (type: string), ltrim(c4) (type: string), (ltrim(c2) = ltrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringLTrim(col 1) -> 4:String, StringLTrim(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: StringLTrim(col 1) -> 6:String, StringLTrim(col 3) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -727,20 +933,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -752,21 +962,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(c2 regexp 'val') -> 4:Long, VectorUDFAdaptor(c4 regexp 'val') -> 5:Long, LongColEqualLongColumn(col 6, col 7)(children: VectorUDFAdaptor(c2 regexp 'val') -> 6:Long, VectorUDFAdaptor(c4 regexp 'val') -> 7:Long) -> 8:long Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -791,20 +1023,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -816,21 +1052,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 4:String, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 6:String, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -855,20 +1113,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -880,21 +1142,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 4:String, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 6:String, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -919,20 +1203,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -958,6 +1246,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFBridge ==> reverse (Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -982,20 +1276,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 832_lav 832_lav true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1021,6 +1319,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFRpad(Column[c2], Const int 15, Const string ) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1045,20 +1349,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1070,21 +1378,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: rtrim(c2) (type: string), rtrim(c4) (type: string), (rtrim(c2) = rtrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringRTrim(col 1) -> 4:String, StringRTrim(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: StringRTrim(col 1) -> 6:String, StringRTrim(col 3) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1109,18 +1439,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1146,6 +1480,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type array> of GenericUDFSentences(Const string See spot run. See jane run.) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1168,18 +1508,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### [["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select split(c2, '_'), split(c4, '_') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select split(c2, '_'), split(c4, '_') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1205,6 +1549,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type array of GenericUDFSplit(Column[c2], Const string _) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1227,18 +1577,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### ["val","238"] ["val","238"] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1264,6 +1618,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of GenericUDFStringToMap(Const string a:1,b:2,c:3, Const string ,, Const string :) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1285,21 +1645,25 @@ from varchar_udf_1 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} -PREHOOK: query: explain +{"b":"2","a":"1","c":"3"} {"b":"2","a":"1","c":"3"} +PREHOOK: query: explain vectorization expression select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1311,21 +1675,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: substr(c2, 1, 3) (type: string), substr(c4, 1, 3) (type: string), (substr(c2, 1, 3) = substr(c4, 1, 3)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringSubstrColStartLen(col 1, start 0, length 3) -> 4:string, StringSubstrColStartLen(col 3, start 0, length 3) -> 5:string, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: StringSubstrColStartLen(col 1, start 0, length 3) -> 6:string, StringSubstrColStartLen(col 3, start 0, length 3) -> 7:string) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1350,20 +1736,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val val true -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1375,21 +1765,43 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: trim(c2) (type: string), trim(c4) (type: string), (trim(c2) = trim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 8] + selectExpressions: StringTrim(col 1) -> 4:String, StringTrim(col 3) -> 5:String, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: StringTrim(col 1) -> 6:String, StringTrim(col 3) -> 7:String) -> 8:boolean Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1415,19 +1827,23 @@ POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: -- Aggregate Functions -explain +explain vectorization expression select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: -- Aggregate Functions -explain +explain vectorization expression select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1452,6 +1868,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -1487,18 +1913,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select min(c2), min(c4) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select min(c2), min(c4) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1510,20 +1940,49 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3] Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(c2), min(c4) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 1) -> string, VectorUDAFMinString(col 3) -> string + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), min(VALUE._col1) @@ -1559,18 +2018,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select max(c2), max(c4) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select max(c2), max(c4) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1582,20 +2045,49 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3] Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(c2), max(c4) + Group By Vectorization: + aggregators: VectorUDAFMaxString(col 1) -> string, VectorUDAFMaxString(col 3) -> string + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) diff --git ql/src/test/results/clientpositive/vector_udf2.q.out ql/src/test/results/clientpositive/vector_udf2.q.out index 12c083f..0b5f164 100644 --- ql/src/test/results/clientpositive/vector_udf2.q.out +++ ql/src/test/results/clientpositive/vector_udf2.q.out @@ -24,7 +24,7 @@ POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type: POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select c1 LIKE '%38%', c2 LIKE 'val_%', @@ -34,7 +34,7 @@ select c3 LIKE '%x38' from varchar_udf_2 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select c1 LIKE '%38%', c2 LIKE 'val_%', @@ -44,6 +44,10 @@ select c3 LIKE '%x38' from varchar_udf_2 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,21 +59,43 @@ STAGE PLANS: TableScan alias: varchar_udf_2 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9] + selectExpressions: SelectStringColLikeStringScalar(col 0) -> 4:String_Family, SelectStringColLikeStringScalar(col 1) -> 5:String_Family, SelectStringColLikeStringScalar(col 2) -> 6:String_Family, SelectStringColLikeStringScalar(col 0) -> 7:String_Family, SelectStringColLikeStringScalar(col 1) -> 8:String_Family, SelectStringColLikeStringScalar(col 2) -> 9:String_Family Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_udf3.q.out ql/src/test/results/clientpositive/vector_udf3.q.out index 7c6a90a..818a888 100644 --- ql/src/test/results/clientpositive/vector_udf3.q.out +++ ql/src/test/results/clientpositive/vector_udf3.q.out @@ -4,10 +4,14 @@ PREHOOK: Output: rot13 POSTHOOK: query: CREATE TEMPORARY FUNCTION rot13 as 'hive.it.custom.udfs.GenericUDFRot13' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Output: rot13 -PREHOOK: query: EXPLAIN SELECT rot13(cstring1) from alltypesorc +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT rot13(cstring1) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT rot13(cstring1) from alltypesorc +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT rot13(cstring1) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -19,18 +23,37 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: Rot13(cstring1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: VectorStringRot13(col 6) -> 12:String Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_varchar_4.q.out ql/src/test/results/clientpositive/vector_varchar_4.q.out index f7c9cd0..205c67a 100644 --- ql/src/test/results/clientpositive/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -144,12 +148,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out index a6c4de9..2141c76 100644 --- ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out @@ -125,11 +125,15 @@ POSTHOOK: Output: default@varchar_join1_str_orc POSTHOOK: Lineage: varchar_join1_str_orc.c1 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: varchar_join1_str_orc.c2 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c2, type:string, comment:null), ] PREHOOK: query: -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -186,8 +190,20 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(10)) @@ -219,11 +235,15 @@ POSTHOOK: Input: default@varchar_join1_vc1_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -280,8 +300,20 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(20)) @@ -315,11 +347,15 @@ POSTHOOK: Input: default@varchar_join1_vc2_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -376,8 +412,20 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 49d8b34..63d13e6 100644 --- ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,6 +81,18 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -139,16 +155,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -171,6 +191,18 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -236,12 +268,16 @@ create table varchar_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table varchar_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -254,19 +290,46 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index 5c981fb..665e92f 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -20,12 +20,16 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -37,12 +41,27 @@ STAGE PLANS: TableScan alias: count_case_groupby Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:Long Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -51,9 +70,26 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorization_0.q.out ql/src/test/results/clientpositive/vectorization_0.q.out index ac33721..d6b71ab 100644 --- ql/src/test/results/clientpositive/vectorization_0.q.out +++ ql/src/test/results/clientpositive/vectorization_0.q.out @@ -1,7 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -12,7 +12,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -32,20 +36,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -68,6 +102,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -106,16 +148,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -128,20 +174,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -163,6 +239,14 @@ STAGE PLANS: key expressions: _col0 (type: bigint) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -195,7 +279,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39856 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -208,7 +292,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -221,6 +305,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -247,6 +335,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -269,6 +369,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -317,7 +425,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -325,7 +433,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -333,6 +441,10 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -345,20 +457,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 3) -> bigint, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -381,6 +523,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -419,16 +569,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -441,20 +595,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -476,6 +660,14 @@ STAGE PLANS: key expressions: _col0 (type: bigint) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -508,7 +700,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1698460028409 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -521,7 +713,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -534,6 +726,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -560,6 +756,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -582,6 +790,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -630,7 +846,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -638,7 +854,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -646,6 +862,10 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -658,20 +878,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 4) -> float, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -694,6 +944,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -732,16 +990,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -754,20 +1016,50 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 4) -> double + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -789,6 +1081,14 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -821,7 +1121,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39479.635992884636 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -834,7 +1134,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -847,6 +1147,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -873,6 +1177,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -895,6 +1211,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -944,7 +1268,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -971,7 +1295,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -998,6 +1322,10 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1009,15 +1337,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterLongColEqualLongScalar(col 11, val 1) -> boolean, FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 4, 0] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE @@ -1026,6 +1372,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index e82e474..a973b71 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -35,7 +35,7 @@ LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -80,15 +84,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -100,6 +123,18 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -127,6 +162,14 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) @@ -258,7 +301,7 @@ NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -292,7 +335,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -325,6 +368,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -337,15 +384,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 0, 8, 4, 6] Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10, col 0, col 8, col 4, col 6 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -357,6 +423,18 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -384,6 +462,14 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 34923d6..6578585 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -35,7 +35,7 @@ ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -68,6 +68,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -100,6 +104,18 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) @@ -127,6 +143,14 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index a2edab4..80f60b0 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -33,7 +33,7 @@ ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -64,6 +64,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -96,6 +100,18 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) @@ -123,6 +139,14 @@ STAGE PLANS: sort order: +++++++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_16.q.out ql/src/test/results/clientpositive/vectorization_16.q.out index c297239..8d3e7e7 100644 --- ql/src/test/results/clientpositive/vectorization_16.q.out +++ ql/src/test/results/clientpositive/vectorization_16.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -73,6 +77,18 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/vectorization_17.q.out ql/src/test/results/clientpositive/vectorization_17.q.out index e01bea2..af6791e 100644 --- ql/src/test/results/clientpositive/vectorization_17.q.out +++ ql/src/test/results/clientpositive/vectorization_17.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -26,7 +26,7 @@ ORDER BY cbigint, cfloat PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -50,6 +50,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -74,6 +78,18 @@ STAGE PLANS: Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_7.q.out ql/src/test/results/clientpositive/vectorization_7.q.out index 25e7657..7aba74a 100644 --- ql/src/test/results/clientpositive/vectorization_7.q.out +++ ql/src/test/results/clientpositive/vectorization_7.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -29,7 +29,7 @@ LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -56,6 +56,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,19 +71,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) @@ -184,7 +217,7 @@ NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -212,7 +245,7 @@ ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -239,6 +272,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -250,19 +287,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] + selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorization_8.q.out ql/src/test/results/clientpositive/vectorization_8.q.out index 3fa8f53..b3ef46e 100644 --- ql/src/test/results/clientpositive/vectorization_8.q.out +++ ql/src/test/results/clientpositive/vectorization_8.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -27,7 +27,7 @@ LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -52,6 +52,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -63,19 +67,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 10.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 16.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) @@ -171,7 +204,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -197,7 +230,7 @@ ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -222,6 +255,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -233,19 +270,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 12.503)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 11.998)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_9.q.out ql/src/test/results/clientpositive/vectorization_9.q.out index c297239..8d3e7e7 100644 --- ql/src/test/results/clientpositive/vectorization_9.q.out +++ ql/src/test/results/clientpositive/vectorization_9.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -73,6 +77,18 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/vectorization_decimal_date.q.out index 6cae52c..d2b56ef 100644 --- ql/src/test/results/clientpositive/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/vectorization_decimal_date.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,24 +31,49 @@ STAGE PLANS: TableScan alias: date_decimal_test Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: (cint is not null and cdouble is not null) (type: boolean) Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date), cdecimal (type: decimal(20,10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorization_div0.q.out ql/src/test/results/clientpositive/vectorization_div0.q.out index 63af5c9..2b2bbbe 100644 --- ql/src/test/results/clientpositive/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/vectorization_div0.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -17,21 +21,43 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: (cdouble / 0.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -150,17 +176,21 @@ NULL PREHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -172,20 +202,49 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 17] + selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) @@ -320,16 +379,20 @@ POSTHOOK: Input: default@alltypesorc 60330397 NULL 0.000000019890470801974 PREHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -341,20 +404,49 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 16, 14, 17] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index eb2a692..8fb773b 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,6 +36,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -57,14 +69,18 @@ POSTHOOK: Input: default@alltypesorc -1887561756 9531.0 PREHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -76,20 +92,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) @@ -141,13 +185,17 @@ POSTHOOK: Input: default@alltypesorc -64 -1600.0 -1600 -64 -200.0 -200 PREHOOK: query: -- deduped RS -explain +explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- deduped RS -explain +explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -159,12 +207,28 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 1.0) -> 12:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 12) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -177,6 +241,18 @@ STAGE PLANS: TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) @@ -230,13 +306,17 @@ NULL 9370.0945309795 -47 -574.6428571428571 -46 3033.55 PREHOOK: query: -- distincts -explain +explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- distincts -explain +explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -248,11 +328,24 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 @@ -261,9 +354,26 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint) @@ -315,12 +425,16 @@ NULL -48 -47 -46 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -332,12 +446,26 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 5 + native: false + projectedOutputColumns: [0] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -346,9 +474,26 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -402,13 +547,17 @@ NULL 2932 -47 22 -46 24 PREHOOK: query: -- limit zero -explain +explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 PREHOOK: type: QUERY POSTHOOK: query: -- limit zero -explain +explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -428,13 +577,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### PREHOOK: query: -- 2MR (applied to last RS) -explain +explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- 2MR (applied to last RS) -explain +explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -447,15 +600,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -464,9 +635,26 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -490,6 +678,14 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) diff --git ql/src/test/results/clientpositive/vectorization_offset_limit.q.out ql/src/test/results/clientpositive/vectorization_offset_limit.q.out index 7f41283..d8a8980 100644 --- ql/src/test/results/clientpositive/vectorization_offset_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_offset_limit.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,6 +37,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -51,12 +63,16 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1887561756 10361.0 -1887561756 -8881.0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,20 +84,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) diff --git ql/src/test/results/clientpositive/vectorization_part_project.q.out ql/src/test/results/clientpositive/vectorization_part_project.q.out index 5463c36..49e0b56 100644 --- ql/src/test/results/clientpositive/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,6 +75,18 @@ STAGE PLANS: Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_pushdown.q.out ql/src/test/results/clientpositive/vectorization_pushdown.q.out index 04780c4..664a9ba 100644 --- ql/src/test/results/clientpositive/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,6 +36,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorization_short_regress.q.out ql/src/test/results/clientpositive/vectorization_short_regress.q.out index a101668..2147000 100644 --- ql/src/test/results/clientpositive/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/vectorization_short_regress.q.out @@ -30,7 +30,8 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -98,7 +99,8 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -134,6 +136,10 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,15 +151,32 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) -> boolean) -> boolean predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 1, 4, 0] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: false + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE @@ -162,6 +185,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) @@ -268,7 +303,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -306,7 +342,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -339,6 +376,10 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -350,15 +391,32 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 5, 0] Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE @@ -367,6 +425,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) @@ -467,7 +537,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -504,7 +575,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -536,6 +608,10 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -547,15 +623,32 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2, 5] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE @@ -564,6 +657,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) @@ -663,7 +768,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -690,7 +796,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION EXPRESSION +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -712,6 +819,10 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -723,15 +834,32 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) outputColumnNames: ctinyint, cbigint, cint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 2, 4] Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float + className: VectorGroupByOperator + vectorOutput: false + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE @@ -740,6 +868,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) @@ -819,7 +959,8 @@ PREHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -860,7 +1001,8 @@ POSTHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -896,6 +1038,10 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -907,19 +1053,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(20,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(20,18) Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(20,18)) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(20,18)) @@ -1073,7 +1248,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1113,7 +1289,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cbigint, cstring1, cboolean1, @@ -1148,6 +1325,10 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1159,19 +1340,48 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28] + selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint) @@ -1276,7 +1486,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1315,7 +1526,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1349,6 +1561,10 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1360,20 +1576,49 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) -> boolean) -> boolean predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(19,18)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28] + selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(19,18), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(19,18)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) sort order: +++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(19,18)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) @@ -1549,7 +1794,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1581,7 +1827,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1608,6 +1855,10 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1619,20 +1870,49 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24] + selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double) @@ -1763,7 +2043,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1788,7 +2069,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION EXPRESSION +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1808,6 +2090,10 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1820,15 +2106,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) outputColumnNames: csmallint, cbigint, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 0] Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1840,6 +2144,18 @@ STAGE PLANS: Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) @@ -1867,6 +2183,14 @@ STAGE PLANS: sort order: +++++++++++ Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(19,18)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint) @@ -1959,7 +2283,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -1991,7 +2316,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2018,6 +2344,10 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2030,15 +2360,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cfloat (type: float) outputColumnNames: cdouble, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 4] Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) + Group By Vectorization: + aggregators: VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFSumDouble(col 5) -> double + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 5 + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2050,6 +2398,18 @@ STAGE PLANS: Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) @@ -2077,6 +2437,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double) @@ -2160,7 +2528,8 @@ PREHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2220,7 +2589,8 @@ POSTHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2275,6 +2645,10 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2287,15 +2661,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColNotEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean, SelectColumnIsNotNull(col 11) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss) -> boolean, FilterDoubleScalarLessDoubleColumn(val -3.0, col 12)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean) -> boolean, FilterDoubleColEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 10) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 6, 2, 1, 0, 4, 5] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) + Group By Vectorization: + aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 8, col 6 + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2307,6 +2699,18 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) @@ -2334,6 +2738,14 @@ STAGE PLANS: sort order: +++++++++++++++++++++++++++++++++++++++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: tinyint), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: double), KEY.reducesinkkey22 (type: double), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey24 (type: double), KEY.reducesinkkey25 (type: double), KEY.reducesinkkey26 (type: double), KEY.reducesinkkey27 (type: tinyint), KEY.reducesinkkey28 (type: double), KEY.reducesinkkey29 (type: double), KEY.reducesinkkey30 (type: double), KEY.reducesinkkey31 (type: double), KEY.reducesinkkey32 (type: decimal(20,18)), KEY.reducesinkkey33 (type: double), KEY.reducesinkkey34 (type: bigint), KEY.reducesinkkey35 (type: double), KEY.reducesinkkey36 (type: bigint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey38 (type: double) @@ -2525,7 +2937,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2570,7 +2983,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2610,6 +3024,10 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2622,15 +3040,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 4, 3, 2, 5, 0, 1] Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 10 + native: false + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -2642,6 +3078,18 @@ STAGE PLANS: Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) @@ -2669,6 +3117,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(23,3)), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: float), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col12 (type: float), VALUE._col15 (type: bigint), VALUE._col16 (type: double), VALUE._col17 (type: decimal(24,3)), VALUE._col18 (type: decimal(25,3)), VALUE._col19 (type: double), VALUE._col20 (type: decimal(25,3)), VALUE._col21 (type: double), VALUE._col22 (type: double), VALUE._col23 (type: double) @@ -2784,12 +3240,16 @@ create table test_count(i int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_count -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2801,18 +3261,47 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2842,12 +3331,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_count #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(i) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(i) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2859,20 +3352,49 @@ STAGE PLANS: TableScan alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: i (type: int) outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(i) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2976,12 +3498,16 @@ POSTHOOK: Lineage: alltypesnullorc.cstring2 SIMPLE [(alltypesnull)alltypesnull.F POSTHOOK: Lineage: alltypesnullorc.ctimestamp1 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctimestamp2 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctinyint SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2993,18 +3519,47 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3034,12 +3589,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 12288 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(ctinyint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3051,20 +3610,49 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3094,12 +3682,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3111,20 +3703,49 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cint) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3154,12 +3775,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cfloat) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3171,20 +3796,49 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cfloat) + Group By Vectorization: + aggregators: VectorUDAFCount(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3214,12 +3868,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cstring1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3231,20 +3889,49 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cstring1 (type: string) outputColumnNames: cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cstring1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 6) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3274,12 +3961,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(cboolean1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3291,20 +3982,49 @@ STAGE PLANS: TableScan alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cboolean1 (type: boolean) outputColumnNames: cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cboolean1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 10) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out index d68f343..21d3a7f 100644 --- ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -111,7 +115,14 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -121,16 +132,34 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -152,12 +181,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -169,7 +202,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -179,16 +219,34 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -215,7 +273,7 @@ PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileforma -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box @@ -223,9 +281,13 @@ POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileform -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -237,7 +299,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -247,16 +316,34 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 08c1412..17ec618 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -45,21 +49,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:String Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -109,7 +136,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -127,7 +154,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -145,6 +172,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -156,21 +187,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:String Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_casts.q.out ql/src/test/results/clientpositive/vectorized_casts.q.out index 8520125..14ef46b 100644 --- ql/src/test/results/clientpositive/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/vectorized_casts.q.out @@ -3,7 +3,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) @@ -82,7 +82,7 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) @@ -156,6 +156,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -182,6 +186,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_context.q.out ql/src/test/results/clientpositive/vectorized_context.q.out index 913d07c..dac9b6b 100644 --- ql/src/test/results/clientpositive/vectorized_context.q.out +++ ql/src/test/results/clientpositive/vectorized_context.q.out @@ -82,20 +82,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@household_demographics POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-7 is a root stage Stage-5 depends on stages: Stage-7 @@ -187,6 +191,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 68a2a4d..4f2503a 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -203,7 +203,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -218,7 +218,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -233,6 +233,10 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -244,18 +248,37 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1) -> 2:long, VectorUDFYearTimestamp(col 1, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 1, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 7:long, VectorUDFWeekOfYearTimestamp(col 1, field WEEK_OF_YEAR) -> 8:long, CastTimestampToDate(col 1) -> 9:date, VectorUDFDateTimestamp(col 1) -> 10:date, VectorUDFDateAddColScalar(col 1, val 2) -> 11:date, VectorUDFDateSubColScalar(col 1, val 2) -> 12:date, VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 13:long Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -434,7 +457,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -449,7 +472,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -464,6 +487,10 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -475,18 +502,37 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 0, 9, 10, 11, 12] + selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long, VectorUDFMonthDate(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 7:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 8:long, VectorUDFDateLong(col 0) -> 9:date, VectorUDFDateAddColScalar(col 0, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0, val 2) -> 11:date, VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 12:long Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -665,7 +711,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -679,7 +725,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -693,6 +739,10 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -704,18 +754,37 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9, 2, 3, 12, 13, 16] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 1, field YEAR) -> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 1, field MONTH) -> 2:long, VectorUDFMonthDate(col 0, field MONTH) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 2:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 1, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 3:long) -> 9:long, LongColEqualLongColumn(col 10, col 0)(children: CastTimestampToDate(col 1) -> 10:date) -> 2:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateTimestamp(col 1) -> 10:date, VectorUDFDateLong(col 0) -> 11:date) -> 3:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateAddColScalar(col 1, val 2) -> 10:date, VectorUDFDateAddColScalar(col 0, val 2) -> 11:date) -> 12:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateSubColScalar(col 1, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0, val 2) -> 11:date) -> 13:long, LongColEqualLongColumn(col 14, col 15)(children: VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 14:long, VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 15:long) -> 16:long Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -894,7 +963,7 @@ true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -903,7 +972,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -912,6 +981,10 @@ POSTHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -923,21 +996,43 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 4, 5, 6, 8] + selectExpressions: VectorUDFDateLong(col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 3:date, VectorUDFDateLong(col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 4:date, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 5:long, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 6:long, VectorUDFDateDiffColCol(col 2, col 7)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date, VectorUDFDateSubColScalar(col 0, val 2) -> 7:date) -> 8:long Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -988,7 +1083,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### 2009-07-30 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -996,7 +1091,7 @@ PREHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -1004,6 +1099,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1016,20 +1115,50 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(fl_date), max(fl_date), count(fl_date), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -1052,6 +1181,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index f8ae962..81292ec 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -16,10 +16,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -47,6 +51,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) @@ -76,10 +92,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -106,6 +126,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), avg(DISTINCT KEY._col0:2._col0), std(DISTINCT KEY._col0:3._col0) diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index b414551..2e51dd6 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -45,12 +49,23 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -58,14 +73,31 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: LongColAddLongColumn(col 0, col 1) -> 2:long Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 0) -> int, VectorUDAFAvgLong(col 2) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE @@ -74,8 +106,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index 73b61a2..1a913fb 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -32,12 +32,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@y POSTHOOK: Lineage: y.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(1) from x, y where a = b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(1) from x, y where a = b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -73,12 +77,23 @@ STAGE PLANS: TableScan alias: y Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: b is not null (type: boolean) Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -86,19 +101,47 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 49 Data size: 199 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 0:long) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/vectorized_math_funcs.q.out index 7b7dedc..830f532 100644 --- ql/src/test/results/clientpositive/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_math_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -54,7 +54,7 @@ and sin(cfloat) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) @@ -106,6 +106,10 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -117,21 +121,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:Double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 30:Double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:Double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:String, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:String, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: DoubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out index ab22912..1acf646 100644 --- ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -96,8 +100,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorized_parquet.q.out ql/src/test/results/clientpositive/vectorized_parquet.q.out index 6cba9f1..e0e3d58 100644 --- ql/src/test/results/clientpositive/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet.q.out @@ -46,16 +46,20 @@ POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldS POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select * +PREHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * +POSTHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -84,6 +88,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Stage: Stage-0 Fetch Operator @@ -115,7 +123,7 @@ POSTHOOK: Input: default@alltypes_parquet 528534767 27 -7824 27.0 -7824.0 cvLH6Eat2yFsyy7p 528534767 -11 -15431 -11.0 -15431.0 cvLH6Eat2yFsyy7p 528534767 61 -15549 61.0 -15549.0 cvLH6Eat2yFsyy7p -PREHOOK: query: explain select ctinyint, +PREHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -124,7 +132,7 @@ PREHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain select ctinyint, +POSTHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -133,6 +141,10 @@ POSTHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -160,6 +172,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index 281fe93..36b8e9d 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -118,15 +118,19 @@ POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] PREHOOK: query: -- select -explain +explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types PREHOOK: type: QUERY POSTHOOK: query: -- select -explain +explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -149,6 +153,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Stage: Stage-0 Fetch Operator @@ -188,12 +196,16 @@ POSTHOOK: Input: default@parquet_types 119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 12.83 120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD 73.04 121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde 90.33 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -216,6 +228,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Stage: Stage-0 Fetch Operator @@ -253,7 +269,7 @@ uvwzy 5 abcdede 7 4.76 1 vwxyz 5 abcdede 7 12.83 1 wxyza 5 abcde 5 73.04 1 bcdef 5 abcde 5 90.33 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -265,7 +281,7 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -277,6 +293,10 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -305,6 +325,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) @@ -328,6 +356,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: decimal(4,2)) diff --git ql/src/test/results/clientpositive/vectorized_ptf.q.out ql/src/test/results/clientpositive/vectorized_ptf.q.out index 7fb966a..8aacbbe 100644 --- ql/src/test/results/clientpositive/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/vectorized_ptf.q.out @@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchem POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] PREHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -132,7 +132,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -142,6 +142,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -165,6 +169,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -215,6 +227,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -266,6 +282,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -292,6 +312,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -414,7 +438,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -423,13 +447,17 @@ sort by j.p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -473,6 +501,9 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -523,6 +554,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [j:p1, j:p2] Needs Tagging: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -564,6 +599,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -590,6 +629,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -641,6 +684,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -667,6 +714,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -772,7 +823,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -780,12 +831,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -808,6 +863,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -858,6 +921,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -952,7 +1019,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -964,7 +1031,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -974,6 +1041,10 @@ from noop(on part_orc order by p_name ) abc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -997,6 +1068,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1047,6 +1126,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1098,6 +1181,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1124,6 +1211,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1246,7 +1337,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1258,7 +1349,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1268,6 +1359,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1291,6 +1386,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1341,6 +1444,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1392,6 +1499,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1418,6 +1529,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1541,7 +1656,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1554,7 +1669,7 @@ group by p_mfgr, p_name, p_size PREHOOK: type: QUERY POSTHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1565,6 +1680,10 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1588,6 +1707,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1638,6 +1765,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1697,6 +1828,10 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1723,6 +1858,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -1848,7 +1987,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -1857,13 +1996,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1887,6 +2030,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1937,6 +2088,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -2008,6 +2163,9 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2079,6 +2237,10 @@ STAGE PLANS: /part_orc [p1] #### A masked pattern was here #### Needs Tagging: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -2163,7 +2325,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -2172,13 +2334,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -2202,6 +2368,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2252,6 +2426,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -2323,6 +2501,9 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2394,6 +2575,10 @@ STAGE PLANS: /part_orc [p1] #### A masked pattern was here #### Needs Tagging: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -2482,7 +2667,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2491,13 +2676,17 @@ order by p_name, p_size desc) PREHOOK: type: QUERY POSTHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2535,6 +2724,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2585,6 +2780,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2636,6 +2835,10 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2662,6 +2865,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2765,7 +2972,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2776,7 +2983,7 @@ from noopwithmap(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2785,6 +2992,10 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2823,6 +3034,12 @@ STAGE PLANS: tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2873,6 +3090,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2925,6 +3146,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2951,6 +3176,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3071,7 +3300,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3082,7 +3311,7 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3091,6 +3320,10 @@ from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3114,6 +3347,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3164,6 +3405,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3215,6 +3460,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3241,6 +3490,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3361,7 +3614,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3373,7 +3626,7 @@ order by p_mfgr, p_name PREHOOK: type: QUERY POSTHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3383,6 +3636,10 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3407,6 +3664,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3457,6 +3722,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3531,6 +3800,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3557,6 +3830,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3616,6 +3893,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3642,6 +3923,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3764,7 +4049,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3779,7 +4064,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 PREHOOK: type: QUERY POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3792,6 +4077,10 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3815,6 +4104,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3865,6 +4162,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [sub1:part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3916,6 +4217,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3942,6 +4247,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4062,7 +4371,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -4076,7 +4385,7 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -4088,6 +4397,10 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4112,6 +4425,14 @@ STAGE PLANS: value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4162,6 +4483,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4233,6 +4558,9 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4304,6 +4632,10 @@ STAGE PLANS: /part_orc [p1] #### A masked pattern was here #### Needs Tagging: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -4345,6 +4677,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4371,6 +4707,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4512,7 +4852,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -4520,12 +4860,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4549,6 +4893,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4599,6 +4951,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -4658,6 +5014,10 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4684,6 +5044,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -4778,7 +5142,7 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part_orc POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4786,7 +5150,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4794,6 +5158,10 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4827,6 +5195,14 @@ STAGE PLANS: value expressions: _col2 (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4877,6 +5253,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [mfgr_price_view:part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -4930,6 +5310,10 @@ STAGE PLANS: tag: -1 value expressions: _col2 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4956,6 +5340,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) @@ -5096,7 +5484,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -5112,7 +5500,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -5128,6 +5516,10 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -5156,6 +5548,14 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5206,6 +5606,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -5274,6 +5678,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5300,6 +5708,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -5420,6 +5832,10 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5446,6 +5862,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -5507,6 +5927,10 @@ STAGE PLANS: tag: -1 value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5533,6 +5957,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5766,7 +6194,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5785,7 +6213,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5802,6 +6230,10 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -5826,6 +6258,14 @@ STAGE PLANS: value expressions: p_name (type: string), p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5876,6 +6316,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5957,6 +6401,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5983,6 +6431,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6042,6 +6494,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6068,6 +6524,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6204,7 +6664,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6223,7 +6683,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6240,6 +6700,10 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -6265,6 +6729,14 @@ STAGE PLANS: value expressions: p_name (type: string), p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6315,6 +6787,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6373,6 +6849,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6399,6 +6879,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6450,6 +6934,10 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6476,6 +6964,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6527,6 +7019,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6553,6 +7049,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6689,7 +7189,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6706,7 +7206,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6721,6 +7221,10 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -6745,6 +7249,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6795,6 +7307,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6853,6 +7369,10 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6879,6 +7399,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6937,6 +7461,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6963,6 +7491,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -7095,7 +7627,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -7114,7 +7646,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -7131,6 +7663,10 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -7156,6 +7692,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7206,6 +7750,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -7264,6 +7812,10 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7290,6 +7842,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -7357,6 +7913,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7383,6 +7943,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -7435,6 +7999,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7461,6 +8029,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -7597,7 +8169,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -7615,7 +8187,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -7631,6 +8203,10 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -7655,6 +8231,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7705,6 +8289,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -7779,6 +8367,10 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7805,6 +8397,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -7864,6 +8460,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7890,6 +8490,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -8024,7 +8628,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -8040,7 +8644,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -8054,6 +8658,10 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -8078,6 +8686,14 @@ STAGE PLANS: value expressions: p_size (type: int) auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -8128,6 +8744,10 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -8202,6 +8822,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -8228,6 +8852,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -8280,6 +8908,10 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int) auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -8306,6 +8938,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out index cd05fd8..7c7468c 100644 --- ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -50,6 +54,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -83,6 +94,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) @@ -105,6 +124,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) diff --git ql/src/test/results/clientpositive/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/vectorized_string_funcs.q.out index ca938b0..f04e17d 100644 --- ql/src/test/results/clientpositive/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_string_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -24,7 +24,7 @@ and cstring1 like '%' PREHOOK: type: QUERY POSTHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -46,6 +46,10 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -72,6 +76,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out index d5d93ed..f1e1515 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -19,12 +19,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -64,12 +68,16 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,12 +135,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -144,18 +156,36 @@ STAGE PLANS: TableScan alias: test Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -173,12 +203,16 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -190,20 +224,50 @@ STAGE PLANS: TableScan alias: test Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: ts (type: timestamp) outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ts), max(ts) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1) diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 58af3e2..37d4b9b 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -73,7 +73,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -110,16 +114,41 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -209,7 +238,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -222,7 +251,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -235,6 +264,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -246,16 +279,41 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -345,7 +403,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -358,7 +416,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -371,6 +429,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -382,16 +444,41 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) @@ -484,7 +571,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -498,7 +585,7 @@ FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY POSTHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -511,6 +598,10 @@ EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -522,16 +613,41 @@ STAGE PLANS: TableScan alias: alltypesorc_wrong Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9] + selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -584,20 +700,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -609,20 +729,50 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -663,15 +813,19 @@ POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -696,6 +850,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -733,7 +897,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -744,7 +908,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -755,6 +919,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -766,12 +934,26 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + className: VectorGroupByOperator + vectorOutput: false + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE @@ -780,6 +962,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) diff --git ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out index 31afb2c..ca5b098 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -16,7 +16,7 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -34,6 +34,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -45,21 +49,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean predicate: ((cbigint % 250) = 0) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [14, 16, 18, 20, 21, 22, 24, 26, 8, 27, 29] + selectExpressions: CastMillisecondsLongToTimestamp(col 0) -> 14:timestamp, CastMillisecondsLongToTimestamp(col 1) -> 16:timestamp, CastMillisecondsLongToTimestamp(col 2) -> 18:timestamp, CastMillisecondsLongToTimestamp(col 3) -> 20:timestamp, CastDoubleToTimestamp(col 4) -> 21:timestamp, CastDoubleToTimestamp(col 5) -> 22:timestamp, CastMillisecondsLongToTimestamp(col 10) -> 24:timestamp, CastMillisecondsLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 26:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 27:Timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 28:string) -> 29:Timestamp Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -131,7 +158,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL 1969-12-19 17:33:32.75 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-30 22:03:04.018 1970-01-21 12:50:53.75 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-27 18:49:09.583 1970-01-14 22:35:27 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -149,7 +176,7 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -167,6 +194,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -178,21 +209,44 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean predicate: ((cbigint % 250) = 0) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19, 20, 8, 21, 23] + selectExpressions: CastLongToTimestamp(col 0) -> 13:timestamp, CastLongToTimestamp(col 1) -> 14:timestamp, CastLongToTimestamp(col 2) -> 15:timestamp, CastLongToTimestamp(col 3) -> 16:timestamp, CastDoubleToTimestamp(col 4) -> 17:timestamp, CastDoubleToTimestamp(col 5) -> 18:timestamp, CastLongToTimestamp(col 10) -> 19:timestamp, CastLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 20:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 21:Timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 22:string) -> 23:Timestamp Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator